cookies.py 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. import base64
  2. import contextlib
  3. import http.cookiejar
  4. import http.cookies
  5. import json
  6. import os
  7. import re
  8. import shutil
  9. import struct
  10. import subprocess
  11. import sys
  12. import tempfile
  13. import time
  14. from datetime import datetime, timedelta, timezone
  15. from enum import Enum, auto
  16. from hashlib import pbkdf2_hmac
  17. from .aes import (
  18. aes_cbc_decrypt_bytes,
  19. aes_gcm_decrypt_and_verify_bytes,
  20. unpad_pkcs7,
  21. )
  22. from .dependencies import (
  23. _SECRETSTORAGE_UNAVAILABLE_REASON,
  24. secretstorage,
  25. sqlite3,
  26. )
  27. from .minicurses import MultilinePrinter, QuietMultilinePrinter
  28. from .utils import (
  29. Popen,
  30. YoutubeDLCookieJar,
  31. error_to_str,
  32. expand_path,
  33. is_path_like,
  34. try_call,
  35. )
  36. CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
  37. SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
  38. class YDLLogger:
  39. def __init__(self, ydl=None):
  40. self._ydl = ydl
  41. def debug(self, message):
  42. if self._ydl:
  43. self._ydl.write_debug(message)
  44. def info(self, message):
  45. if self._ydl:
  46. self._ydl.to_screen(f'[Cookies] {message}')
  47. def warning(self, message, only_once=False):
  48. if self._ydl:
  49. self._ydl.report_warning(message, only_once)
  50. def error(self, message):
  51. if self._ydl:
  52. self._ydl.report_error(message)
  53. class ProgressBar(MultilinePrinter):
  54. _DELAY, _timer = 0.1, 0
  55. def print(self, message):
  56. if time.time() - self._timer > self._DELAY:
  57. self.print_at_line(f'[Cookies] {message}', 0)
  58. self._timer = time.time()
  59. def progress_bar(self):
  60. """Return a context manager with a print method. (Optional)"""
  61. # Do not print to files/pipes, loggers, or when --no-progress is used
  62. if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
  63. return
  64. file = self._ydl._out_files.error
  65. try:
  66. if not file.isatty():
  67. return
  68. except BaseException:
  69. return
  70. return self.ProgressBar(file, preserve_output=False)
  71. def _create_progress_bar(logger):
  72. if hasattr(logger, 'progress_bar'):
  73. printer = logger.progress_bar()
  74. if printer:
  75. return printer
  76. printer = QuietMultilinePrinter()
  77. printer.print = lambda _: None
  78. return printer
  79. def load_cookies(cookie_file, browser_specification, ydl):
  80. cookie_jars = []
  81. if browser_specification is not None:
  82. browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
  83. cookie_jars.append(
  84. extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
  85. if cookie_file is not None:
  86. is_filename = is_path_like(cookie_file)
  87. if is_filename:
  88. cookie_file = expand_path(cookie_file)
  89. jar = YoutubeDLCookieJar(cookie_file)
  90. if not is_filename or os.access(cookie_file, os.R_OK):
  91. jar.load(ignore_discard=True, ignore_expires=True)
  92. cookie_jars.append(jar)
  93. return _merge_cookie_jars(cookie_jars)
  94. def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
  95. if browser_name == 'firefox':
  96. return _extract_firefox_cookies(profile, container, logger)
  97. elif browser_name == 'safari':
  98. return _extract_safari_cookies(profile, logger)
  99. elif browser_name in CHROMIUM_BASED_BROWSERS:
  100. return _extract_chrome_cookies(browser_name, profile, keyring, logger)
  101. else:
  102. raise ValueError(f'unknown browser: {browser_name}')
  103. def _extract_firefox_cookies(profile, container, logger):
  104. logger.info('Extracting cookies from firefox')
  105. if not sqlite3:
  106. logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
  107. 'Please use a python interpreter compiled with sqlite3 support')
  108. return YoutubeDLCookieJar()
  109. if profile is None:
  110. search_root = _firefox_browser_dir()
  111. elif _is_path(profile):
  112. search_root = profile
  113. else:
  114. search_root = os.path.join(_firefox_browser_dir(), profile)
  115. cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
  116. if cookie_database_path is None:
  117. raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
  118. logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
  119. container_id = None
  120. if container not in (None, 'none'):
  121. containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
  122. if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
  123. raise FileNotFoundError(f'could not read containers.json in {search_root}')
  124. with open(containers_path) as containers:
  125. identities = json.load(containers).get('identities', [])
  126. container_id = next((context.get('userContextId') for context in identities if container in (
  127. context.get('name'),
  128. try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
  129. )), None)
  130. if not isinstance(container_id, int):
  131. raise ValueError(f'could not find firefox container "{container}" in containers.json')
  132. with tempfile.TemporaryDirectory(prefix='hypervideo_dl') as tmpdir:
  133. cursor = None
  134. try:
  135. cursor = _open_database_copy(cookie_database_path, tmpdir)
  136. if isinstance(container_id, int):
  137. logger.debug(
  138. f'Only loading cookies from firefox container "{container}", ID {container_id}')
  139. cursor.execute(
  140. 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
  141. (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
  142. elif container == 'none':
  143. logger.debug('Only loading cookies not belonging to any container')
  144. cursor.execute(
  145. 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
  146. else:
  147. cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
  148. jar = YoutubeDLCookieJar()
  149. with _create_progress_bar(logger) as progress_bar:
  150. table = cursor.fetchall()
  151. total_cookie_count = len(table)
  152. for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
  153. progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
  154. cookie = http.cookiejar.Cookie(
  155. version=0, name=name, value=value, port=None, port_specified=False,
  156. domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
  157. path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
  158. comment=None, comment_url=None, rest={})
  159. jar.set_cookie(cookie)
  160. logger.info(f'Extracted {len(jar)} cookies from firefox')
  161. return jar
  162. finally:
  163. if cursor is not None:
  164. cursor.connection.close()
  165. def _firefox_browser_dir():
  166. if sys.platform in ('cygwin', 'win32'):
  167. return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
  168. elif sys.platform == 'darwin':
  169. return os.path.expanduser('~/Library/Application Support/Firefox')
  170. return os.path.expanduser('~/.mozilla/firefox')
  171. def _get_chromium_based_browser_settings(browser_name):
  172. # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
  173. if sys.platform in ('cygwin', 'win32'):
  174. appdata_local = os.path.expandvars('%LOCALAPPDATA%')
  175. appdata_roaming = os.path.expandvars('%APPDATA%')
  176. browser_dir = {
  177. 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
  178. 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
  179. 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
  180. 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
  181. 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
  182. 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
  183. }[browser_name]
  184. elif sys.platform == 'darwin':
  185. appdata = os.path.expanduser('~/Library/Application Support')
  186. browser_dir = {
  187. 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
  188. 'chrome': os.path.join(appdata, 'Google/Chrome'),
  189. 'chromium': os.path.join(appdata, 'Chromium'),
  190. 'edge': os.path.join(appdata, 'Microsoft Edge'),
  191. 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
  192. 'vivaldi': os.path.join(appdata, 'Vivaldi'),
  193. }[browser_name]
  194. else:
  195. config = _config_home()
  196. browser_dir = {
  197. 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
  198. 'chrome': os.path.join(config, 'google-chrome'),
  199. 'chromium': os.path.join(config, 'chromium'),
  200. 'edge': os.path.join(config, 'microsoft-edge'),
  201. 'opera': os.path.join(config, 'opera'),
  202. 'vivaldi': os.path.join(config, 'vivaldi'),
  203. }[browser_name]
  204. # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
  205. # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
  206. keyring_name = {
  207. 'brave': 'Brave',
  208. 'chrome': 'Chrome',
  209. 'chromium': 'Chromium',
  210. 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
  211. 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
  212. 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
  213. }[browser_name]
  214. browsers_without_profiles = {'opera'}
  215. return {
  216. 'browser_dir': browser_dir,
  217. 'keyring_name': keyring_name,
  218. 'supports_profiles': browser_name not in browsers_without_profiles
  219. }
  220. def _extract_chrome_cookies(browser_name, profile, keyring, logger):
  221. logger.info(f'Extracting cookies from {browser_name}')
  222. if not sqlite3:
  223. logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
  224. 'Please use a python interpreter compiled with sqlite3 support')
  225. return YoutubeDLCookieJar()
  226. config = _get_chromium_based_browser_settings(browser_name)
  227. if profile is None:
  228. search_root = config['browser_dir']
  229. elif _is_path(profile):
  230. search_root = profile
  231. config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
  232. else:
  233. if config['supports_profiles']:
  234. search_root = os.path.join(config['browser_dir'], profile)
  235. else:
  236. logger.error(f'{browser_name} does not support profiles')
  237. search_root = config['browser_dir']
  238. cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
  239. if cookie_database_path is None:
  240. raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
  241. logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
  242. decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
  243. with tempfile.TemporaryDirectory(prefix='hypervideo_dl') as tmpdir:
  244. cursor = None
  245. try:
  246. cursor = _open_database_copy(cookie_database_path, tmpdir)
  247. cursor.connection.text_factory = bytes
  248. column_names = _get_column_names(cursor, 'cookies')
  249. secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
  250. cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
  251. jar = YoutubeDLCookieJar()
  252. failed_cookies = 0
  253. unencrypted_cookies = 0
  254. with _create_progress_bar(logger) as progress_bar:
  255. table = cursor.fetchall()
  256. total_cookie_count = len(table)
  257. for i, line in enumerate(table):
  258. progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
  259. is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
  260. if not cookie:
  261. failed_cookies += 1
  262. continue
  263. elif not is_encrypted:
  264. unencrypted_cookies += 1
  265. jar.set_cookie(cookie)
  266. if failed_cookies > 0:
  267. failed_message = f' ({failed_cookies} could not be decrypted)'
  268. else:
  269. failed_message = ''
  270. logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
  271. counts = decryptor._cookie_counts.copy()
  272. counts['unencrypted'] = unencrypted_cookies
  273. logger.debug(f'cookie version breakdown: {counts}')
  274. return jar
  275. finally:
  276. if cursor is not None:
  277. cursor.connection.close()
  278. def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
  279. host_key = host_key.decode()
  280. name = name.decode()
  281. value = value.decode()
  282. path = path.decode()
  283. is_encrypted = not value and encrypted_value
  284. if is_encrypted:
  285. value = decryptor.decrypt(encrypted_value)
  286. if value is None:
  287. return is_encrypted, None
  288. return is_encrypted, http.cookiejar.Cookie(
  289. version=0, name=name, value=value, port=None, port_specified=False,
  290. domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
  291. path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
  292. comment=None, comment_url=None, rest={})
  293. class ChromeCookieDecryptor:
  294. """
  295. Overview:
  296. Linux:
  297. - cookies are either v10 or v11
  298. - v10: AES-CBC encrypted with a fixed key
  299. - v11: AES-CBC encrypted with an OS protected key (keyring)
  300. - v11 keys can be stored in various places depending on the activate desktop environment [2]
  301. Mac:
  302. - cookies are either v10 or not v10
  303. - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
  304. - not v10: 'old data' stored as plaintext
  305. Windows:
  306. - cookies are either v10 or not v10
  307. - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
  308. - not v10: encrypted with DPAPI
  309. Sources:
  310. - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
  311. - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc
  312. - KeyStorageLinux::CreateService
  313. """
  314. _cookie_counts = {}
  315. def decrypt(self, encrypted_value):
  316. raise NotImplementedError('Must be implemented by sub classes')
  317. def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
  318. if sys.platform == 'darwin':
  319. return MacChromeCookieDecryptor(browser_keyring_name, logger)
  320. elif sys.platform in ('win32', 'cygwin'):
  321. return WindowsChromeCookieDecryptor(browser_root, logger)
  322. return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
  323. class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
  324. def __init__(self, browser_keyring_name, logger, *, keyring=None):
  325. self._logger = logger
  326. self._v10_key = self.derive_key(b'peanuts')
  327. password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
  328. self._v11_key = None if password is None else self.derive_key(password)
  329. self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
  330. @staticmethod
  331. def derive_key(password):
  332. # values from
  333. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
  334. return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
  335. def decrypt(self, encrypted_value):
  336. version = encrypted_value[:3]
  337. ciphertext = encrypted_value[3:]
  338. if version == b'v10':
  339. self._cookie_counts['v10'] += 1
  340. return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
  341. elif version == b'v11':
  342. self._cookie_counts['v11'] += 1
  343. if self._v11_key is None:
  344. self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
  345. return None
  346. return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
  347. else:
  348. self._cookie_counts['other'] += 1
  349. return None
  350. class MacChromeCookieDecryptor(ChromeCookieDecryptor):
  351. def __init__(self, browser_keyring_name, logger):
  352. self._logger = logger
  353. password = _get_mac_keyring_password(browser_keyring_name, logger)
  354. self._v10_key = None if password is None else self.derive_key(password)
  355. self._cookie_counts = {'v10': 0, 'other': 0}
  356. @staticmethod
  357. def derive_key(password):
  358. # values from
  359. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
  360. return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
  361. def decrypt(self, encrypted_value):
  362. version = encrypted_value[:3]
  363. ciphertext = encrypted_value[3:]
  364. if version == b'v10':
  365. self._cookie_counts['v10'] += 1
  366. if self._v10_key is None:
  367. self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
  368. return None
  369. return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
  370. else:
  371. self._cookie_counts['other'] += 1
  372. # other prefixes are considered 'old data' which were stored as plaintext
  373. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
  374. return encrypted_value
  375. class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
  376. def __init__(self, browser_root, logger):
  377. self._logger = logger
  378. self._v10_key = _get_windows_v10_key(browser_root, logger)
  379. self._cookie_counts = {'v10': 0, 'other': 0}
  380. def decrypt(self, encrypted_value):
  381. version = encrypted_value[:3]
  382. ciphertext = encrypted_value[3:]
  383. if version == b'v10':
  384. self._cookie_counts['v10'] += 1
  385. if self._v10_key is None:
  386. self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
  387. return None
  388. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
  389. # kNonceLength
  390. nonce_length = 96 // 8
  391. # boringssl
  392. # EVP_AEAD_AES_GCM_TAG_LEN
  393. authentication_tag_length = 16
  394. raw_ciphertext = ciphertext
  395. nonce = raw_ciphertext[:nonce_length]
  396. ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
  397. authentication_tag = raw_ciphertext[-authentication_tag_length:]
  398. return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
  399. else:
  400. self._cookie_counts['other'] += 1
  401. # any other prefix means the data is DPAPI encrypted
  402. # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
  403. return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
  404. def _extract_safari_cookies(profile, logger):
  405. if profile is not None:
  406. logger.error('safari does not support profiles')
  407. if sys.platform != 'darwin':
  408. raise ValueError(f'unsupported platform: {sys.platform}')
  409. cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
  410. if not os.path.isfile(cookies_path):
  411. logger.debug('Trying secondary cookie location')
  412. cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
  413. if not os.path.isfile(cookies_path):
  414. raise FileNotFoundError('could not find safari cookies database')
  415. with open(cookies_path, 'rb') as f:
  416. cookies_data = f.read()
  417. jar = parse_safari_cookies(cookies_data, logger=logger)
  418. logger.info(f'Extracted {len(jar)} cookies from safari')
  419. return jar
  420. class ParserError(Exception):
  421. pass
  422. class DataParser:
  423. def __init__(self, data, logger):
  424. self._data = data
  425. self.cursor = 0
  426. self._logger = logger
  427. def read_bytes(self, num_bytes):
  428. if num_bytes < 0:
  429. raise ParserError(f'invalid read of {num_bytes} bytes')
  430. end = self.cursor + num_bytes
  431. if end > len(self._data):
  432. raise ParserError('reached end of input')
  433. data = self._data[self.cursor:end]
  434. self.cursor = end
  435. return data
  436. def expect_bytes(self, expected_value, message):
  437. value = self.read_bytes(len(expected_value))
  438. if value != expected_value:
  439. raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
  440. def read_uint(self, big_endian=False):
  441. data_format = '>I' if big_endian else '<I'
  442. return struct.unpack(data_format, self.read_bytes(4))[0]
  443. def read_double(self, big_endian=False):
  444. data_format = '>d' if big_endian else '<d'
  445. return struct.unpack(data_format, self.read_bytes(8))[0]
  446. def read_cstring(self):
  447. buffer = []
  448. while True:
  449. c = self.read_bytes(1)
  450. if c == b'\x00':
  451. return b''.join(buffer).decode()
  452. else:
  453. buffer.append(c)
  454. def skip(self, num_bytes, description='unknown'):
  455. if num_bytes > 0:
  456. self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
  457. elif num_bytes < 0:
  458. raise ParserError(f'invalid skip of {num_bytes} bytes')
  459. def skip_to(self, offset, description='unknown'):
  460. self.skip(offset - self.cursor, description)
  461. def skip_to_end(self, description='unknown'):
  462. self.skip_to(len(self._data), description)
  463. def _mac_absolute_time_to_posix(timestamp):
  464. return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
  465. def _parse_safari_cookies_header(data, logger):
  466. p = DataParser(data, logger)
  467. p.expect_bytes(b'cook', 'database signature')
  468. number_of_pages = p.read_uint(big_endian=True)
  469. page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
  470. return page_sizes, p.cursor
  471. def _parse_safari_cookies_page(data, jar, logger):
  472. p = DataParser(data, logger)
  473. p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
  474. number_of_cookies = p.read_uint()
  475. record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
  476. if number_of_cookies == 0:
  477. logger.debug(f'a cookies page of size {len(data)} has no cookies')
  478. return
  479. p.skip_to(record_offsets[0], 'unknown page header field')
  480. with _create_progress_bar(logger) as progress_bar:
  481. for i, record_offset in enumerate(record_offsets):
  482. progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
  483. p.skip_to(record_offset, 'space between records')
  484. record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
  485. p.read_bytes(record_length)
  486. p.skip_to_end('space in between pages')
  487. def _parse_safari_cookies_record(data, jar, logger):
  488. p = DataParser(data, logger)
  489. record_size = p.read_uint()
  490. p.skip(4, 'unknown record field 1')
  491. flags = p.read_uint()
  492. is_secure = bool(flags & 0x0001)
  493. p.skip(4, 'unknown record field 2')
  494. domain_offset = p.read_uint()
  495. name_offset = p.read_uint()
  496. path_offset = p.read_uint()
  497. value_offset = p.read_uint()
  498. p.skip(8, 'unknown record field 3')
  499. expiration_date = _mac_absolute_time_to_posix(p.read_double())
  500. _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
  501. try:
  502. p.skip_to(domain_offset)
  503. domain = p.read_cstring()
  504. p.skip_to(name_offset)
  505. name = p.read_cstring()
  506. p.skip_to(path_offset)
  507. path = p.read_cstring()
  508. p.skip_to(value_offset)
  509. value = p.read_cstring()
  510. except UnicodeDecodeError:
  511. logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
  512. return record_size
  513. p.skip_to(record_size, 'space at the end of the record')
  514. cookie = http.cookiejar.Cookie(
  515. version=0, name=name, value=value, port=None, port_specified=False,
  516. domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
  517. path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
  518. comment=None, comment_url=None, rest={})
  519. jar.set_cookie(cookie)
  520. return record_size
  521. def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
  522. """
  523. References:
  524. - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
  525. - this data appears to be out of date but the important parts of the database structure is the same
  526. - there are a few bytes here and there which are skipped during parsing
  527. """
  528. if jar is None:
  529. jar = YoutubeDLCookieJar()
  530. page_sizes, body_start = _parse_safari_cookies_header(data, logger)
  531. p = DataParser(data[body_start:], logger)
  532. for page_size in page_sizes:
  533. _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
  534. p.skip_to_end('footer')
  535. return jar
  536. class _LinuxDesktopEnvironment(Enum):
  537. """
  538. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
  539. DesktopEnvironment
  540. """
  541. OTHER = auto()
  542. CINNAMON = auto()
  543. GNOME = auto()
  544. KDE = auto()
  545. PANTHEON = auto()
  546. UNITY = auto()
  547. XFCE = auto()
  548. class _LinuxKeyring(Enum):
  549. """
  550. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
  551. SelectedLinuxBackend
  552. """
  553. KWALLET = auto()
  554. GNOMEKEYRING = auto()
  555. BASICTEXT = auto()
  556. SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
  557. def _get_linux_desktop_environment(env):
  558. """
  559. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
  560. GetDesktopEnvironment
  561. """
  562. xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
  563. desktop_session = env.get('DESKTOP_SESSION', None)
  564. if xdg_current_desktop is not None:
  565. xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
  566. if xdg_current_desktop == 'Unity':
  567. if desktop_session is not None and 'gnome-fallback' in desktop_session:
  568. return _LinuxDesktopEnvironment.GNOME
  569. else:
  570. return _LinuxDesktopEnvironment.UNITY
  571. elif xdg_current_desktop == 'GNOME':
  572. return _LinuxDesktopEnvironment.GNOME
  573. elif xdg_current_desktop == 'X-Cinnamon':
  574. return _LinuxDesktopEnvironment.CINNAMON
  575. elif xdg_current_desktop == 'KDE':
  576. return _LinuxDesktopEnvironment.KDE
  577. elif xdg_current_desktop == 'Pantheon':
  578. return _LinuxDesktopEnvironment.PANTHEON
  579. elif xdg_current_desktop == 'XFCE':
  580. return _LinuxDesktopEnvironment.XFCE
  581. elif desktop_session is not None:
  582. if desktop_session in ('mate', 'gnome'):
  583. return _LinuxDesktopEnvironment.GNOME
  584. elif 'kde' in desktop_session:
  585. return _LinuxDesktopEnvironment.KDE
  586. elif 'xfce' in desktop_session:
  587. return _LinuxDesktopEnvironment.XFCE
  588. else:
  589. if 'GNOME_DESKTOP_SESSION_ID' in env:
  590. return _LinuxDesktopEnvironment.GNOME
  591. elif 'KDE_FULL_SESSION' in env:
  592. return _LinuxDesktopEnvironment.KDE
  593. return _LinuxDesktopEnvironment.OTHER
  594. def _choose_linux_keyring(logger):
  595. """
  596. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
  597. SelectBackend
  598. """
  599. desktop_environment = _get_linux_desktop_environment(os.environ)
  600. logger.debug(f'detected desktop environment: {desktop_environment.name}')
  601. if desktop_environment == _LinuxDesktopEnvironment.KDE:
  602. linux_keyring = _LinuxKeyring.KWALLET
  603. elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
  604. linux_keyring = _LinuxKeyring.BASICTEXT
  605. else:
  606. linux_keyring = _LinuxKeyring.GNOMEKEYRING
  607. return linux_keyring
  608. def _get_kwallet_network_wallet(logger):
  609. """ The name of the wallet used to store network passwords.
  610. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
  611. KWalletDBus::NetworkWallet
  612. which does a dbus call to the following function:
  613. https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
  614. Wallet::NetworkWallet
  615. """
  616. default_wallet = 'kdewallet'
  617. try:
  618. stdout, _, returncode = Popen.run([
  619. 'dbus-send', '--session', '--print-reply=literal',
  620. '--dest=org.kde.kwalletd5',
  621. '/modules/kwalletd5',
  622. 'org.kde.KWallet.networkWallet'
  623. ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  624. if returncode:
  625. logger.warning('failed to read NetworkWallet')
  626. return default_wallet
  627. else:
  628. logger.debug(f'NetworkWallet = "{stdout.strip()}"')
  629. return stdout.strip()
  630. except Exception as e:
  631. logger.warning(f'exception while obtaining NetworkWallet: {e}')
  632. return default_wallet
  633. def _get_kwallet_password(browser_keyring_name, logger):
  634. logger.debug('using kwallet-query to obtain password from kwallet')
  635. if shutil.which('kwallet-query') is None:
  636. logger.error('kwallet-query command not found. KWallet and kwallet-query '
  637. 'must be installed to read from KWallet. kwallet-query should be'
  638. 'included in the kwallet package for your distribution')
  639. return b''
  640. network_wallet = _get_kwallet_network_wallet(logger)
  641. try:
  642. stdout, _, returncode = Popen.run([
  643. 'kwallet-query',
  644. '--read-password', f'{browser_keyring_name} Safe Storage',
  645. '--folder', f'{browser_keyring_name} Keys',
  646. network_wallet
  647. ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  648. if returncode:
  649. logger.error(f'kwallet-query failed with return code {returncode}. '
  650. 'Please consult the kwallet-query man page for details')
  651. return b''
  652. else:
  653. if stdout.lower().startswith(b'failed to read'):
  654. logger.debug('failed to read password from kwallet. Using empty string instead')
  655. # this sometimes occurs in KDE because chrome does not check hasEntry and instead
  656. # just tries to read the value (which kwallet returns "") whereas kwallet-query
  657. # checks hasEntry. To verify this:
  658. # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
  659. # while starting chrome.
  660. # this may be a bug as the intended behaviour is to generate a random password and store
  661. # it, but that doesn't matter here.
  662. return b''
  663. else:
  664. logger.debug('password found')
  665. return stdout.rstrip(b'\n')
  666. except Exception as e:
  667. logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
  668. return b''
  669. def _get_gnome_keyring_password(browser_keyring_name, logger):
  670. if not secretstorage:
  671. logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
  672. return b''
  673. # the Gnome keyring does not seem to organise keys in the same way as KWallet,
  674. # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
  675. # and presumably searches for its key in the list. It appears that we must do the same.
  676. # https://github.com/jaraco/keyring/issues/556
  677. with contextlib.closing(secretstorage.dbus_init()) as con:
  678. col = secretstorage.get_default_collection(con)
  679. for item in col.get_all_items():
  680. if item.get_label() == f'{browser_keyring_name} Safe Storage':
  681. return item.get_secret()
  682. else:
  683. logger.error('failed to read from keyring')
  684. return b''
  685. def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
  686. # note: chrome/chromium can be run with the following flags to determine which keyring backend
  687. # it has chosen to use
  688. # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
  689. # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
  690. # will not be sufficient in all cases.
  691. keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
  692. logger.debug(f'Chosen keyring: {keyring.name}')
  693. if keyring == _LinuxKeyring.KWALLET:
  694. return _get_kwallet_password(browser_keyring_name, logger)
  695. elif keyring == _LinuxKeyring.GNOMEKEYRING:
  696. return _get_gnome_keyring_password(browser_keyring_name, logger)
  697. elif keyring == _LinuxKeyring.BASICTEXT:
  698. # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
  699. return None
  700. assert False, f'Unknown keyring {keyring}'
  701. def _get_mac_keyring_password(browser_keyring_name, logger):
  702. logger.debug('using find-generic-password to obtain password from OSX keychain')
  703. try:
  704. stdout, _, returncode = Popen.run(
  705. ['security', 'find-generic-password',
  706. '-w', # write password to stdout
  707. '-a', browser_keyring_name, # match 'account'
  708. '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
  709. stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
  710. if returncode:
  711. logger.warning('find-generic-password failed')
  712. return None
  713. return stdout.rstrip(b'\n')
  714. except Exception as e:
  715. logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
  716. return None
  717. def _get_windows_v10_key(browser_root, logger):
  718. path = _find_most_recently_used_file(browser_root, 'Local State', logger)
  719. if path is None:
  720. logger.error('could not find local state file')
  721. return None
  722. logger.debug(f'Found local state file at "{path}"')
  723. with open(path, encoding='utf8') as f:
  724. data = json.load(f)
  725. try:
  726. base64_key = data['os_crypt']['encrypted_key']
  727. except KeyError:
  728. logger.error('no encrypted key in Local State')
  729. return None
  730. encrypted_key = base64.b64decode(base64_key)
  731. prefix = b'DPAPI'
  732. if not encrypted_key.startswith(prefix):
  733. logger.error('invalid key')
  734. return None
  735. return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
  736. def pbkdf2_sha1(password, salt, iterations, key_length):
  737. return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
  738. def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16):
  739. plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
  740. try:
  741. return plaintext.decode()
  742. except UnicodeDecodeError:
  743. logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
  744. return None
  745. def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
  746. try:
  747. plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
  748. except ValueError:
  749. logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
  750. return None
  751. try:
  752. return plaintext.decode()
  753. except UnicodeDecodeError:
  754. logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
  755. return None
  756. def _decrypt_windows_dpapi(ciphertext, logger):
  757. """
  758. References:
  759. - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
  760. """
  761. import ctypes
  762. import ctypes.wintypes
  763. class DATA_BLOB(ctypes.Structure):
  764. _fields_ = [('cbData', ctypes.wintypes.DWORD),
  765. ('pbData', ctypes.POINTER(ctypes.c_char))]
  766. buffer = ctypes.create_string_buffer(ciphertext)
  767. blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
  768. blob_out = DATA_BLOB()
  769. ret = ctypes.windll.crypt32.CryptUnprotectData(
  770. ctypes.byref(blob_in), # pDataIn
  771. None, # ppszDataDescr: human readable description of pDataIn
  772. None, # pOptionalEntropy: salt?
  773. None, # pvReserved: must be NULL
  774. None, # pPromptStruct: information about prompts to display
  775. 0, # dwFlags
  776. ctypes.byref(blob_out) # pDataOut
  777. )
  778. if not ret:
  779. logger.warning('failed to decrypt with DPAPI', only_once=True)
  780. return None
  781. result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
  782. ctypes.windll.kernel32.LocalFree(blob_out.pbData)
  783. return result
  784. def _config_home():
  785. return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
  786. def _open_database_copy(database_path, tmpdir):
  787. # cannot open sqlite databases if they are already in use (e.g. by the browser)
  788. database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
  789. shutil.copy(database_path, database_copy_path)
  790. conn = sqlite3.connect(database_copy_path)
  791. return conn.cursor()
  792. def _get_column_names(cursor, table_name):
  793. table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
  794. return [row[1].decode() for row in table_info]
  795. def _find_most_recently_used_file(root, filename, logger):
  796. # if there are multiple browser profiles, take the most recently used one
  797. i, paths = 0, []
  798. with _create_progress_bar(logger) as progress_bar:
  799. for curr_root, dirs, files in os.walk(root):
  800. for file in files:
  801. i += 1
  802. progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
  803. if file == filename:
  804. paths.append(os.path.join(curr_root, file))
  805. return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
  806. def _merge_cookie_jars(jars):
  807. output_jar = YoutubeDLCookieJar()
  808. for jar in jars:
  809. for cookie in jar:
  810. output_jar.set_cookie(cookie)
  811. if jar.filename is not None:
  812. output_jar.filename = jar.filename
  813. return output_jar
  814. def _is_path(value):
  815. return os.path.sep in value
  816. def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
  817. if browser_name not in SUPPORTED_BROWSERS:
  818. raise ValueError(f'unsupported browser: "{browser_name}"')
  819. if keyring not in (None, *SUPPORTED_KEYRINGS):
  820. raise ValueError(f'unsupported keyring: "{keyring}"')
  821. if profile is not None and _is_path(expand_path(profile)):
  822. profile = expand_path(profile)
  823. return browser_name, profile, keyring, container
  824. class LenientSimpleCookie(http.cookies.SimpleCookie):
  825. """More lenient version of http.cookies.SimpleCookie"""
  826. # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
  827. # We use Morsel's legal key chars to avoid errors on setting values
  828. _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
  829. _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
  830. _RESERVED = {
  831. "expires",
  832. "path",
  833. "comment",
  834. "domain",
  835. "max-age",
  836. "secure",
  837. "httponly",
  838. "version",
  839. "samesite",
  840. }
  841. _FLAGS = {"secure", "httponly"}
  842. # Added 'bad' group to catch the remaining value
  843. _COOKIE_PATTERN = re.compile(r"""
  844. \s* # Optional whitespace at start of cookie
  845. (?P<key> # Start of group 'key'
  846. [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
  847. ) # End of group 'key'
  848. ( # Optional group: there may not be a value.
  849. \s*=\s* # Equal Sign
  850. ( # Start of potential value
  851. (?P<val> # Start of group 'val'
  852. "(?:[^\\"]|\\.)*" # Any doublequoted string
  853. | # or
  854. \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
  855. | # or
  856. [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
  857. ) # End of group 'val'
  858. | # or
  859. (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
  860. ) # End of potential value
  861. )? # End of optional value group
  862. \s* # Any number of spaces.
  863. (\s+|;|$) # Ending either at space, semicolon, or EOS.
  864. """, re.ASCII | re.VERBOSE)
  865. def load(self, data):
  866. # Workaround for https://github.com/hypervideo/hypervideo/issues/4776
  867. if not isinstance(data, str):
  868. return super().load(data)
  869. morsel = None
  870. for match in self._COOKIE_PATTERN.finditer(data):
  871. if match.group('bad'):
  872. morsel = None
  873. continue
  874. key, value = match.group('key', 'val')
  875. is_attribute = False
  876. if key.startswith('$'):
  877. key = key[1:]
  878. is_attribute = True
  879. lower_key = key.lower()
  880. if lower_key in self._RESERVED:
  881. if morsel is None:
  882. continue
  883. if value is None:
  884. if lower_key not in self._FLAGS:
  885. morsel = None
  886. continue
  887. value = True
  888. else:
  889. value, _ = self.value_decode(value)
  890. morsel[key] = value
  891. elif is_attribute:
  892. morsel = None
  893. elif value is not None:
  894. morsel = self.get(key, http.cookies.Morsel())
  895. real_value, coded_value = self.value_decode(value)
  896. morsel.set(key, real_value, coded_value)
  897. self[key] = morsel
  898. else:
  899. morsel = None