external.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. import enum
  2. import os.path
  3. import re
  4. import subprocess
  5. import sys
  6. import time
  7. from .fragment import FragmentFD
  8. from ..compat import functools
  9. from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
  10. from ..utils import (
  11. Popen,
  12. RetryManager,
  13. _configuration_args,
  14. check_executable,
  15. classproperty,
  16. cli_bool_option,
  17. cli_option,
  18. cli_valueless_option,
  19. determine_ext,
  20. encodeArgument,
  21. encodeFilename,
  22. handle_youtubedl_headers,
  23. remove_end,
  24. traverse_obj,
  25. )
  26. class Features(enum.Enum):
  27. TO_STDOUT = enum.auto()
  28. MULTIPLE_FORMATS = enum.auto()
  29. class ExternalFD(FragmentFD):
  30. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
  31. SUPPORTED_FEATURES = ()
  32. _CAPTURE_STDERR = True
  33. def real_download(self, filename, info_dict):
  34. self.report_destination(filename)
  35. tmpfilename = self.temp_name(filename)
  36. try:
  37. started = time.time()
  38. retval = self._call_downloader(tmpfilename, info_dict)
  39. except KeyboardInterrupt:
  40. if not info_dict.get('is_live'):
  41. raise
  42. # Live stream downloading cancellation should be considered as
  43. # correct and expected termination thus all postprocessing
  44. # should take place
  45. retval = 0
  46. self.to_screen('[%s] Interrupted by user' % self.get_basename())
  47. if retval == 0:
  48. status = {
  49. 'filename': filename,
  50. 'status': 'finished',
  51. 'elapsed': time.time() - started,
  52. }
  53. if filename != '-':
  54. fsize = os.path.getsize(encodeFilename(tmpfilename))
  55. self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
  56. self.try_rename(tmpfilename, filename)
  57. status.update({
  58. 'downloaded_bytes': fsize,
  59. 'total_bytes': fsize,
  60. })
  61. self._hook_progress(status, info_dict)
  62. return True
  63. else:
  64. self.to_stderr('\n')
  65. self.report_error('%s exited with code %d' % (
  66. self.get_basename(), retval))
  67. return False
  68. @classmethod
  69. def get_basename(cls):
  70. return cls.__name__[:-2].lower()
  71. @classproperty
  72. def EXE_NAME(cls):
  73. return cls.get_basename()
  74. @functools.cached_property
  75. def exe(self):
  76. return self.EXE_NAME
  77. @classmethod
  78. def available(cls, path=None):
  79. path = check_executable(
  80. cls.EXE_NAME if path in (None, cls.get_basename()) else path,
  81. [cls.AVAILABLE_OPT])
  82. if not path:
  83. return False
  84. cls.exe = path
  85. return path
  86. @classmethod
  87. def supports(cls, info_dict):
  88. return all((
  89. not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
  90. '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
  91. all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
  92. ))
  93. @classmethod
  94. def can_download(cls, info_dict, path=None):
  95. return cls.available(path) and cls.supports(info_dict)
  96. def _option(self, command_option, param):
  97. return cli_option(self.params, command_option, param)
  98. def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
  99. return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
  100. def _valueless_option(self, command_option, param, expected_value=True):
  101. return cli_valueless_option(self.params, command_option, param, expected_value)
  102. def _configuration_args(self, keys=None, *args, **kwargs):
  103. return _configuration_args(
  104. self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
  105. keys, *args, **kwargs)
  106. def _call_downloader(self, tmpfilename, info_dict):
  107. """ Either overwrite this or implement _make_cmd """
  108. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  109. self._debug_cmd(cmd)
  110. if 'fragments' not in info_dict:
  111. _, stderr, returncode = Popen.run(
  112. cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
  113. if returncode and stderr:
  114. self.to_stderr(stderr)
  115. return returncode
  116. skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
  117. retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
  118. frag_index=None, fatal=not skip_unavailable_fragments)
  119. for retry in retry_manager:
  120. _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
  121. if not returncode:
  122. break
  123. # TODO: Decide whether to retry based on error code
  124. # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
  125. if stderr:
  126. self.to_stderr(stderr)
  127. retry.error = Exception()
  128. continue
  129. if not skip_unavailable_fragments and retry_manager.error:
  130. return -1
  131. decrypt_fragment = self.decrypter(info_dict)
  132. dest, _ = self.sanitize_open(tmpfilename, 'wb')
  133. for frag_index, fragment in enumerate(info_dict['fragments']):
  134. fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
  135. try:
  136. src, _ = self.sanitize_open(fragment_filename, 'rb')
  137. except OSError as err:
  138. if skip_unavailable_fragments and frag_index > 1:
  139. self.report_skip_fragment(frag_index, err)
  140. continue
  141. self.report_error(f'Unable to open fragment {frag_index}; {err}')
  142. return -1
  143. dest.write(decrypt_fragment(fragment, src.read()))
  144. src.close()
  145. if not self.params.get('keep_fragments', False):
  146. self.try_remove(encodeFilename(fragment_filename))
  147. dest.close()
  148. self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
  149. return 0
  150. class CurlFD(ExternalFD):
  151. AVAILABLE_OPT = '-V'
  152. _CAPTURE_STDERR = False # curl writes the progress to stderr
  153. def _make_cmd(self, tmpfilename, info_dict):
  154. cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
  155. if info_dict.get('http_headers') is not None:
  156. for key, val in info_dict['http_headers'].items():
  157. cmd += ['--header', f'{key}: {val}']
  158. cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
  159. cmd += self._valueless_option('--silent', 'noprogress')
  160. cmd += self._valueless_option('--verbose', 'verbose')
  161. cmd += self._option('--limit-rate', 'ratelimit')
  162. retry = self._option('--retry', 'retries')
  163. if len(retry) == 2:
  164. if retry[1] in ('inf', 'infinite'):
  165. retry[1] = '2147483647'
  166. cmd += retry
  167. cmd += self._option('--max-filesize', 'max_filesize')
  168. cmd += self._option('--interface', 'source_address')
  169. cmd += self._option('--proxy', 'proxy')
  170. cmd += self._valueless_option('--insecure', 'nocheckcertificate')
  171. cmd += self._configuration_args()
  172. cmd += ['--', info_dict['url']]
  173. return cmd
  174. class AxelFD(ExternalFD):
  175. AVAILABLE_OPT = '-V'
  176. def _make_cmd(self, tmpfilename, info_dict):
  177. cmd = [self.exe, '-o', tmpfilename]
  178. if info_dict.get('http_headers') is not None:
  179. for key, val in info_dict['http_headers'].items():
  180. cmd += ['-H', f'{key}: {val}']
  181. cmd += self._configuration_args()
  182. cmd += ['--', info_dict['url']]
  183. return cmd
  184. class WgetFD(ExternalFD):
  185. AVAILABLE_OPT = '--version'
  186. def _make_cmd(self, tmpfilename, info_dict):
  187. cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
  188. if info_dict.get('http_headers') is not None:
  189. for key, val in info_dict['http_headers'].items():
  190. cmd += ['--header', f'{key}: {val}']
  191. cmd += self._option('--limit-rate', 'ratelimit')
  192. retry = self._option('--tries', 'retries')
  193. if len(retry) == 2:
  194. if retry[1] in ('inf', 'infinite'):
  195. retry[1] = '0'
  196. cmd += retry
  197. cmd += self._option('--bind-address', 'source_address')
  198. proxy = self.params.get('proxy')
  199. if proxy:
  200. for var in ('http_proxy', 'https_proxy'):
  201. cmd += ['--execute', f'{var}={proxy}']
  202. cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
  203. cmd += self._configuration_args()
  204. cmd += ['--', info_dict['url']]
  205. return cmd
  206. class Aria2cFD(ExternalFD):
  207. AVAILABLE_OPT = '-v'
  208. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
  209. @staticmethod
  210. def supports_manifest(manifest):
  211. UNSUPPORTED_FEATURES = [
  212. r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
  213. # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
  214. ]
  215. check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
  216. return all(check_results)
  217. @staticmethod
  218. def _aria2c_filename(fn):
  219. return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
  220. def _make_cmd(self, tmpfilename, info_dict):
  221. cmd = [self.exe, '-c',
  222. '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
  223. '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
  224. if 'fragments' in info_dict:
  225. cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
  226. else:
  227. cmd += ['--min-split-size', '1M']
  228. if info_dict.get('http_headers') is not None:
  229. for key, val in info_dict['http_headers'].items():
  230. cmd += ['--header', f'{key}: {val}']
  231. cmd += self._option('--max-overall-download-limit', 'ratelimit')
  232. cmd += self._option('--interface', 'source_address')
  233. cmd += self._option('--all-proxy', 'proxy')
  234. cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
  235. cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
  236. cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
  237. cmd += self._configuration_args()
  238. # aria2c strips out spaces from the beginning/end of filenames and paths.
  239. # We work around this issue by adding a "./" to the beginning of the
  240. # filename and relative path, and adding a "/" at the end of the path.
  241. # See: https://github.com/hypervideo/hypervideo/issues/276
  242. # https://github.com/ytdl-org/youtube-dl/issues/20312
  243. # https://github.com/aria2/aria2/issues/1373
  244. dn = os.path.dirname(tmpfilename)
  245. if dn:
  246. cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
  247. if 'fragments' not in info_dict:
  248. cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
  249. cmd += ['--auto-file-renaming=false']
  250. if 'fragments' in info_dict:
  251. cmd += ['--file-allocation=none', '--uri-selector=inorder']
  252. url_list_file = '%s.frag.urls' % tmpfilename
  253. url_list = []
  254. for frag_index, fragment in enumerate(info_dict['fragments']):
  255. fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
  256. url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
  257. stream, _ = self.sanitize_open(url_list_file, 'wb')
  258. stream.write('\n'.join(url_list).encode())
  259. stream.close()
  260. cmd += ['-i', self._aria2c_filename(url_list_file)]
  261. else:
  262. cmd += ['--', info_dict['url']]
  263. return cmd
  264. class HttpieFD(ExternalFD):
  265. AVAILABLE_OPT = '--version'
  266. EXE_NAME = 'http'
  267. def _make_cmd(self, tmpfilename, info_dict):
  268. cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
  269. if info_dict.get('http_headers') is not None:
  270. for key, val in info_dict['http_headers'].items():
  271. cmd += [f'{key}:{val}']
  272. return cmd
  273. class FFmpegFD(ExternalFD):
  274. SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
  275. SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
  276. @classmethod
  277. def available(cls, path=None):
  278. # TODO: Fix path for ffmpeg
  279. # Fixme: This may be wrong when --ffmpeg-location is used
  280. return FFmpegPostProcessor().available
  281. def on_process_started(self, proc, stdin):
  282. """ Override this in subclasses """
  283. pass
  284. @classmethod
  285. def can_merge_formats(cls, info_dict, params):
  286. return (
  287. info_dict.get('requested_formats')
  288. and info_dict.get('protocol')
  289. and not params.get('allow_unplayable_formats')
  290. and 'no-direct-merge' not in params.get('compat_opts', [])
  291. and cls.can_download(info_dict))
  292. def _call_downloader(self, tmpfilename, info_dict):
  293. urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
  294. ffpp = FFmpegPostProcessor(downloader=self)
  295. if not ffpp.available:
  296. self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
  297. return False
  298. ffpp.check_version()
  299. args = [ffpp.executable, '-y']
  300. for log_level in ('quiet', 'verbose'):
  301. if self.params.get(log_level, False):
  302. args += ['-loglevel', log_level]
  303. break
  304. if not self.params.get('verbose'):
  305. args += ['-hide_banner']
  306. args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
  307. # These exists only for compatibility. Extractors should use
  308. # info_dict['downloader_options']['ffmpeg_args'] instead
  309. args += info_dict.get('_ffmpeg_args') or []
  310. seekable = info_dict.get('_seekable')
  311. if seekable is not None:
  312. # setting -seekable prevents ffmpeg from guessing if the server
  313. # supports seeking(by adding the header `Range: bytes=0-`), which
  314. # can cause problems in some cases
  315. # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
  316. # http://trac.ffmpeg.org/ticket/6125#comment:10
  317. args += ['-seekable', '1' if seekable else '0']
  318. http_headers = None
  319. if info_dict.get('http_headers'):
  320. youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
  321. http_headers = [
  322. # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
  323. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
  324. '-headers',
  325. ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items())
  326. ]
  327. env = None
  328. proxy = self.params.get('proxy')
  329. if proxy:
  330. if not re.match(r'^[\da-zA-Z]+://', proxy):
  331. proxy = 'http://%s' % proxy
  332. if proxy.startswith('socks'):
  333. self.report_warning(
  334. '%s does not support SOCKS proxies. Downloading is likely to fail. '
  335. 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
  336. # Since December 2015 ffmpeg supports -http_proxy option (see
  337. # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
  338. # We could switch to the following code if we are able to detect version properly
  339. # args += ['-http_proxy', proxy]
  340. env = os.environ.copy()
  341. env['HTTP_PROXY'] = proxy
  342. env['http_proxy'] = proxy
  343. protocol = info_dict.get('protocol')
  344. if protocol == 'rtmp':
  345. player_url = info_dict.get('player_url')
  346. page_url = info_dict.get('page_url')
  347. app = info_dict.get('app')
  348. play_path = info_dict.get('play_path')
  349. tc_url = info_dict.get('tc_url')
  350. flash_version = info_dict.get('flash_version')
  351. live = info_dict.get('rtmp_live', False)
  352. conn = info_dict.get('rtmp_conn')
  353. if player_url is not None:
  354. args += ['-rtmp_swfverify', player_url]
  355. if page_url is not None:
  356. args += ['-rtmp_pageurl', page_url]
  357. if app is not None:
  358. args += ['-rtmp_app', app]
  359. if play_path is not None:
  360. args += ['-rtmp_playpath', play_path]
  361. if tc_url is not None:
  362. args += ['-rtmp_tcurl', tc_url]
  363. if flash_version is not None:
  364. args += ['-rtmp_flashver', flash_version]
  365. if live:
  366. args += ['-rtmp_live', 'live']
  367. if isinstance(conn, list):
  368. for entry in conn:
  369. args += ['-rtmp_conn', entry]
  370. elif isinstance(conn, str):
  371. args += ['-rtmp_conn', conn]
  372. start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
  373. for i, url in enumerate(urls):
  374. if http_headers is not None and re.match(r'^https?://', url):
  375. args += http_headers
  376. if start_time:
  377. args += ['-ss', str(start_time)]
  378. if end_time:
  379. args += ['-t', str(end_time - start_time)]
  380. args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
  381. if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
  382. args += ['-c', 'copy']
  383. if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
  384. for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
  385. stream_number = fmt.get('manifest_stream_number', 0)
  386. args.extend(['-map', f'{i}:{stream_number}'])
  387. if self.params.get('test', False):
  388. args += ['-fs', str(self._TEST_FILE_SIZE)]
  389. ext = info_dict['ext']
  390. if protocol in ('m3u8', 'm3u8_native'):
  391. use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
  392. if use_mpegts is None:
  393. use_mpegts = info_dict.get('is_live')
  394. if use_mpegts:
  395. args += ['-f', 'mpegts']
  396. else:
  397. args += ['-f', 'mp4']
  398. if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
  399. args += ['-bsf:a', 'aac_adtstoasc']
  400. elif protocol == 'rtmp':
  401. args += ['-f', 'flv']
  402. elif ext == 'mp4' and tmpfilename == '-':
  403. args += ['-f', 'mpegts']
  404. elif ext == 'unknown_video':
  405. ext = determine_ext(remove_end(tmpfilename, '.part'))
  406. if ext == 'unknown_video':
  407. self.report_warning(
  408. 'The video format is unknown and cannot be downloaded by ffmpeg. '
  409. 'Explicitly set the extension in the filename to attempt download in that format')
  410. else:
  411. self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
  412. args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
  413. else:
  414. args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
  415. args += self._configuration_args(('_o1', '_o', ''))
  416. args = [encodeArgument(opt) for opt in args]
  417. args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
  418. self._debug_cmd(args)
  419. with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
  420. if url in ('-', 'pipe:'):
  421. self.on_process_started(proc, proc.stdin)
  422. try:
  423. retval = proc.wait()
  424. except BaseException as e:
  425. # subprocces.run would send the SIGKILL signal to ffmpeg and the
  426. # mp4 file couldn't be played, but if we ask ffmpeg to quit it
  427. # produces a file that is playable (this is mostly useful for live
  428. # streams). Note that Windows is not affected and produces playable
  429. # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
  430. if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
  431. proc.communicate_or_kill(b'q')
  432. else:
  433. proc.kill(timeout=None)
  434. raise
  435. return retval
  436. class AVconvFD(FFmpegFD):
  437. pass
  438. _BY_NAME = {
  439. klass.get_basename(): klass
  440. for name, klass in globals().items()
  441. if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
  442. }
  443. def list_external_downloaders():
  444. return sorted(_BY_NAME.keys())
  445. def get_external_downloader(external_downloader):
  446. """ Given the name of the executable, see whether we support the given downloader """
  447. bn = os.path.splitext(os.path.basename(external_downloader))[0]
  448. return _BY_NAME.get(bn) or next((
  449. klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
  450. ), None)