odnoklassniki.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. from .common import InfoExtractor
  2. from ..compat import (
  3. compat_etree_fromstring,
  4. compat_parse_qs,
  5. compat_urllib_parse_unquote,
  6. compat_urllib_parse_urlparse,
  7. )
  8. from ..utils import (
  9. ExtractorError,
  10. float_or_none,
  11. int_or_none,
  12. qualities,
  13. smuggle_url,
  14. unescapeHTML,
  15. unified_strdate,
  16. unsmuggle_url,
  17. urlencode_postdata,
  18. )
  19. class OdnoklassnikiIE(InfoExtractor):
  20. _VALID_URL = r'''(?x)
  21. https?://
  22. (?:(?:www|m|mobile)\.)?
  23. (?:odnoklassniki|ok)\.ru/
  24. (?:
  25. video(?P<embed>embed)?/|
  26. web-api/video/moviePlayer/|
  27. live/|
  28. dk\?.*?st\.mvId=
  29. )
  30. (?P<id>[\d-]+)
  31. '''
  32. _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1']
  33. _TESTS = [{
  34. 'note': 'Coub embedded',
  35. 'url': 'http://ok.ru/video/1484130554189',
  36. 'info_dict': {
  37. 'id': '1keok9',
  38. 'ext': 'mp4',
  39. 'timestamp': 1545580896,
  40. 'view_count': int,
  41. 'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
  42. 'title': 'Народная забава',
  43. 'uploader': 'Nevata',
  44. 'upload_date': '20181223',
  45. 'age_limit': 0,
  46. 'uploader_id': 'nevata.s',
  47. 'like_count': int,
  48. 'duration': 8.08,
  49. 'repost_count': int,
  50. },
  51. }, {
  52. 'note': 'vk.com embedded',
  53. 'url': 'https://ok.ru/video/3568183087575',
  54. 'info_dict': {
  55. 'id': '-165101755_456243749',
  56. 'ext': 'mp4',
  57. 'uploader_id': '-165101755',
  58. 'duration': 132,
  59. 'timestamp': 1642869935,
  60. 'upload_date': '20220122',
  61. 'thumbnail': str,
  62. 'title': str,
  63. 'uploader': str,
  64. },
  65. }, {
  66. # metadata in JSON
  67. 'url': 'http://ok.ru/video/20079905452',
  68. 'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
  69. 'info_dict': {
  70. 'id': '20079905452',
  71. 'ext': 'mp4',
  72. 'title': 'Культура меняет нас (прекрасный ролик!))',
  73. 'thumbnail': str,
  74. 'duration': 100,
  75. 'upload_date': '20141207',
  76. 'uploader_id': '330537914540',
  77. 'uploader': 'Виталий Добровольский',
  78. 'like_count': int,
  79. 'age_limit': 0,
  80. },
  81. }, {
  82. # metadataUrl
  83. 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
  84. 'md5': 'f8c951122516af72e6e6ffdd3c41103b',
  85. 'info_dict': {
  86. 'id': '63567059965189-0',
  87. 'ext': 'mp4',
  88. 'title': 'Девушка без комплексов ...',
  89. 'thumbnail': str,
  90. 'duration': 191,
  91. 'upload_date': '20150518',
  92. 'uploader_id': '534380003155',
  93. 'uploader': '☭ Андрей Мещанинов ☭',
  94. 'like_count': int,
  95. 'age_limit': 0,
  96. 'start_time': 5,
  97. },
  98. }, {
  99. # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
  100. 'url': 'https://ok.ru/video/3952212382174',
  101. 'md5': '91749d0bd20763a28d083fa335bbd37a',
  102. 'info_dict': {
  103. 'id': '5axVgHHDBvU',
  104. 'ext': 'mp4',
  105. 'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
  106. 'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
  107. 'uploader': 'Lod Mer',
  108. 'uploader_id': '575186401502',
  109. 'duration': 1529,
  110. 'age_limit': 0,
  111. 'upload_date': '20210405',
  112. 'comment_count': int,
  113. 'live_status': 'not_live',
  114. 'view_count': int,
  115. 'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
  116. 'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
  117. 'channel_follower_count': int,
  118. 'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
  119. 'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
  120. 'like_count': int,
  121. 'availability': 'public',
  122. 'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
  123. 'categories': ['Education'],
  124. 'playable_in_embed': True,
  125. 'channel': 'BornToReact',
  126. },
  127. }, {
  128. # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
  129. 'url': 'http://ok.ru/video/62036049272859-0',
  130. 'info_dict': {
  131. 'id': '62036049272859-0',
  132. 'ext': 'mp4',
  133. 'title': 'МУЗЫКА ДОЖДЯ .',
  134. 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
  135. 'upload_date': '20120106',
  136. 'uploader_id': '473534735899',
  137. 'uploader': 'МARINA D',
  138. 'age_limit': 0,
  139. },
  140. 'params': {
  141. 'skip_download': True,
  142. },
  143. 'skip': 'Video has not been found',
  144. }, {
  145. # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
  146. 'note': 'Only available in mobile webpage',
  147. 'url': 'https://m.ok.ru/video/2361249957145',
  148. 'info_dict': {
  149. 'id': '2361249957145',
  150. 'ext': 'mp4',
  151. 'title': 'Быковское крещение',
  152. 'duration': 3038.181,
  153. },
  154. }, {
  155. 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
  156. 'only_matching': True,
  157. }, {
  158. 'url': 'http://www.ok.ru/video/20648036891',
  159. 'only_matching': True,
  160. }, {
  161. 'url': 'http://www.ok.ru/videoembed/20648036891',
  162. 'only_matching': True,
  163. }, {
  164. 'url': 'http://m.ok.ru/video/20079905452',
  165. 'only_matching': True,
  166. }, {
  167. 'url': 'http://mobile.ok.ru/video/20079905452',
  168. 'only_matching': True,
  169. }, {
  170. 'url': 'https://www.ok.ru/live/484531969818',
  171. 'only_matching': True,
  172. }, {
  173. 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
  174. 'only_matching': True,
  175. }, {
  176. # Paid video
  177. 'url': 'https://ok.ru/video/954886983203',
  178. 'only_matching': True,
  179. }, {
  180. 'url': 'https://ok.ru/videoembed/2932705602075',
  181. 'info_dict': {
  182. 'id': '2932705602075',
  183. 'ext': 'mp4',
  184. 'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
  185. 'title': 'Boosty для тебя!',
  186. 'uploader_id': '597811038747',
  187. 'like_count': 0,
  188. 'duration': 35,
  189. },
  190. }]
  191. _WEBPAGE_TESTS = [{
  192. 'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
  193. 'info_dict': {
  194. 'id': '3950343629563',
  195. 'ext': 'mp4',
  196. 'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
  197. 'title': 'Заяц Бусти.mp4',
  198. 'uploader_id': '571368965883',
  199. 'like_count': 0,
  200. 'duration': 10444,
  201. },
  202. }]
  203. @classmethod
  204. def _extract_embed_urls(cls, url, webpage):
  205. for x in super()._extract_embed_urls(url, webpage):
  206. yield smuggle_url(x, {'referrer': url})
  207. def _real_extract(self, url):
  208. try:
  209. return self._extract_desktop(url)
  210. except ExtractorError as e:
  211. try:
  212. return self._extract_mobile(url)
  213. except ExtractorError:
  214. # error message of desktop webpage is in English
  215. raise e
  216. def _extract_desktop(self, url):
  217. start_time = int_or_none(compat_parse_qs(
  218. compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
  219. url, smuggled = unsmuggle_url(url, {})
  220. video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
  221. mode = 'videoembed' if is_embed else 'video'
  222. webpage = self._download_webpage(
  223. f'https://ok.ru/{mode}/{video_id}', video_id,
  224. note='Downloading desktop webpage',
  225. headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})
  226. error = self._search_regex(
  227. r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
  228. webpage, 'error', default=None)
  229. # Direct link from boosty
  230. if (error == 'The author of this video has not been found or is blocked'
  231. and not smuggled.get('referrer') and mode == 'videoembed'):
  232. return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
  233. elif error:
  234. raise ExtractorError(error, expected=True)
  235. player = self._parse_json(
  236. unescapeHTML(self._search_regex(
  237. r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
  238. webpage, 'player', group='player')),
  239. video_id)
  240. # embedded external player
  241. if player.get('isExternalPlayer') and player.get('url'):
  242. return self.url_result(player['url'])
  243. flashvars = player['flashvars']
  244. metadata = flashvars.get('metadata')
  245. if metadata:
  246. metadata = self._parse_json(metadata, video_id)
  247. else:
  248. data = {}
  249. st_location = flashvars.get('location')
  250. if st_location:
  251. data['st.location'] = st_location
  252. metadata = self._download_json(
  253. compat_urllib_parse_unquote(flashvars['metadataUrl']),
  254. video_id, 'Downloading metadata JSON',
  255. data=urlencode_postdata(data))
  256. movie = metadata['movie']
  257. # Some embedded videos may not contain title in movie dict (e.g.
  258. # http://ok.ru/video/62036049272859-0) thus we allow missing title
  259. # here and it's going to be extracted later by an extractor that
  260. # will process the actual embed.
  261. provider = metadata.get('provider')
  262. title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
  263. thumbnail = movie.get('poster')
  264. duration = int_or_none(movie.get('duration'))
  265. author = metadata.get('author', {})
  266. uploader_id = author.get('id')
  267. uploader = author.get('name')
  268. upload_date = unified_strdate(self._html_search_meta(
  269. 'ya:ovs:upload_date', webpage, 'upload date', default=None))
  270. age_limit = None
  271. adult = self._html_search_meta(
  272. 'ya:ovs:adult', webpage, 'age limit', default=None)
  273. if adult:
  274. age_limit = 18 if adult == 'true' else 0
  275. like_count = int_or_none(metadata.get('likeCount'))
  276. info = {
  277. 'id': video_id,
  278. 'title': title,
  279. 'thumbnail': thumbnail,
  280. 'duration': duration,
  281. 'upload_date': upload_date,
  282. 'uploader': uploader,
  283. 'uploader_id': uploader_id,
  284. 'like_count': like_count,
  285. 'age_limit': age_limit,
  286. 'start_time': start_time,
  287. }
  288. # pladform
  289. if provider == 'OPEN_GRAPH':
  290. info.update({
  291. '_type': 'url_transparent',
  292. 'url': movie['contentId'],
  293. })
  294. return info
  295. if provider == 'USER_YOUTUBE':
  296. info.update({
  297. '_type': 'url_transparent',
  298. 'url': movie['contentId'],
  299. })
  300. return info
  301. assert title
  302. if provider == 'LIVE_TV_APP':
  303. info['title'] = title
  304. quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))
  305. formats = [{
  306. 'url': f['url'],
  307. 'ext': 'mp4',
  308. 'format_id': f['name'],
  309. } for f in metadata['videos']]
  310. m3u8_url = metadata.get('hlsManifestUrl')
  311. if m3u8_url:
  312. formats.extend(self._extract_m3u8_formats(
  313. m3u8_url, video_id, 'mp4', 'm3u8_native',
  314. m3u8_id='hls', fatal=False))
  315. dash_manifest = metadata.get('metadataEmbedded')
  316. if dash_manifest:
  317. formats.extend(self._parse_mpd_formats(
  318. compat_etree_fromstring(dash_manifest), 'mpd'))
  319. for fmt in formats:
  320. fmt_type = self._search_regex(
  321. r'\btype[/=](\d)', fmt['url'],
  322. 'format type', default=None)
  323. if fmt_type:
  324. fmt['quality'] = quality(fmt_type)
  325. # Live formats
  326. m3u8_url = metadata.get('hlsMasterPlaylistUrl')
  327. if m3u8_url:
  328. formats.extend(self._extract_m3u8_formats(
  329. m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
  330. rtmp_url = metadata.get('rtmpUrl')
  331. if rtmp_url:
  332. formats.append({
  333. 'url': rtmp_url,
  334. 'format_id': 'rtmp',
  335. 'ext': 'flv',
  336. })
  337. if not formats:
  338. payment_info = metadata.get('paymentInfo')
  339. if payment_info:
  340. self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
  341. info['formats'] = formats
  342. return info
  343. def _extract_mobile(self, url):
  344. video_id = self._match_id(url)
  345. webpage = self._download_webpage(
  346. 'http://m.ok.ru/video/%s' % video_id, video_id,
  347. note='Downloading mobile webpage')
  348. error = self._search_regex(
  349. r'видео</a>\s*<div\s+class="empty">(.+?)</div>',
  350. webpage, 'error', default=None)
  351. if error:
  352. raise ExtractorError(error, expected=True)
  353. json_data = self._search_regex(
  354. r'data-video="(.+?)"', webpage, 'json data')
  355. json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
  356. return {
  357. 'id': video_id,
  358. 'title': json_data.get('videoName'),
  359. 'duration': float_or_none(json_data.get('videoDuration'), scale=1000),
  360. 'thumbnail': json_data.get('videoPosterSrc'),
  361. 'formats': [{
  362. 'format_id': 'mobile',
  363. 'url': json_data.get('videoSrc'),
  364. 'ext': 'mp4',
  365. }]
  366. }