yandexdisk.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import json
  2. from .common import InfoExtractor
  3. from ..utils import (
  4. determine_ext,
  5. float_or_none,
  6. int_or_none,
  7. mimetype2ext,
  8. try_get,
  9. urljoin,
  10. )
  11. class YandexDiskIE(InfoExtractor):
  12. _VALID_URL = r'''(?x)https?://
  13. (?P<domain>
  14. yadi\.sk|
  15. disk\.yandex\.
  16. (?:
  17. az|
  18. by|
  19. co(?:m(?:\.(?:am|ge|tr))?|\.il)|
  20. ee|
  21. fr|
  22. k[gz]|
  23. l[tv]|
  24. md|
  25. t[jm]|
  26. u[az]|
  27. ru
  28. )
  29. )/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
  30. _TESTS = [{
  31. 'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
  32. 'md5': 'a4a8d52958c8fddcf9845935070402ae',
  33. 'info_dict': {
  34. 'id': 'VdOeDou8eZs6Y',
  35. 'ext': 'mp4',
  36. 'title': '4.mp4',
  37. 'duration': 168.6,
  38. 'uploader': 'y.botova',
  39. 'uploader_id': '300043621',
  40. 'view_count': int,
  41. },
  42. 'expected_warnings': ['Unable to download JSON metadata'],
  43. }, {
  44. 'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
  45. 'only_matching': True,
  46. }, {
  47. 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
  48. 'only_matching': True,
  49. }]
  50. def _real_extract(self, url):
  51. domain, video_id = self._match_valid_url(url).groups()
  52. webpage = self._download_webpage(url, video_id)
  53. store = self._parse_json(self._search_regex(
  54. r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
  55. webpage, 'store'), video_id)
  56. resource = store['resources'][store['rootResourceId']]
  57. title = resource['name']
  58. meta = resource.get('meta') or {}
  59. public_url = meta.get('short_url')
  60. if public_url:
  61. video_id = self._match_id(public_url)
  62. source_url = (self._download_json(
  63. 'https://cloud-api.yandex.net/v1/disk/public/resources/download',
  64. video_id, query={'public_key': url}, fatal=False) or {}).get('href')
  65. video_streams = resource.get('videoStreams') or {}
  66. video_hash = resource.get('hash') or url
  67. environment = store.get('environment') or {}
  68. sk = environment.get('sk')
  69. yandexuid = environment.get('yandexuid')
  70. if sk and yandexuid and not (source_url and video_streams):
  71. self._set_cookie(domain, 'yandexuid', yandexuid)
  72. def call_api(action):
  73. return (self._download_json(
  74. urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
  75. 'hash': video_hash,
  76. 'sk': sk,
  77. }).encode(), headers={
  78. 'Content-Type': 'text/plain',
  79. }, fatal=False) or {}).get('data') or {}
  80. if not source_url:
  81. # TODO: figure out how to detect if download limit has
  82. # been reached and then avoid unnecessary source format
  83. # extraction requests
  84. source_url = call_api('download-url').get('url')
  85. if not video_streams:
  86. video_streams = call_api('get-video-streams')
  87. formats = []
  88. if source_url:
  89. formats.append({
  90. 'url': source_url,
  91. 'format_id': 'source',
  92. 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
  93. 'quality': 1,
  94. 'filesize': int_or_none(meta.get('size'))
  95. })
  96. for video in (video_streams.get('videos') or []):
  97. format_url = video.get('url')
  98. if not format_url:
  99. continue
  100. if video.get('dimension') == 'adaptive':
  101. formats.extend(self._extract_m3u8_formats(
  102. format_url, video_id, 'mp4', 'm3u8_native',
  103. m3u8_id='hls', fatal=False))
  104. else:
  105. size = video.get('size') or {}
  106. height = int_or_none(size.get('height'))
  107. format_id = 'hls'
  108. if height:
  109. format_id += '-%dp' % height
  110. formats.append({
  111. 'ext': 'mp4',
  112. 'format_id': format_id,
  113. 'height': height,
  114. 'protocol': 'm3u8_native',
  115. 'url': format_url,
  116. 'width': int_or_none(size.get('width')),
  117. })
  118. uid = resource.get('uid')
  119. display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
  120. return {
  121. 'id': video_id,
  122. 'title': title,
  123. 'duration': float_or_none(video_streams.get('duration'), 1000),
  124. 'uploader': display_name,
  125. 'uploader_id': uid,
  126. 'view_count': int_or_none(meta.get('views_counter')),
  127. 'formats': formats,
  128. }