iqiyi.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. import hashlib
  2. import itertools
  3. import re
  4. import time
  5. from .common import InfoExtractor
  6. from ..compat import (
  7. compat_str,
  8. compat_urllib_parse_unquote
  9. )
  10. from .openload import PhantomJSwrapper
  11. from ..utils import (
  12. clean_html,
  13. ExtractorError,
  14. float_or_none,
  15. format_field,
  16. get_element_by_id,
  17. get_element_by_attribute,
  18. int_or_none,
  19. js_to_json,
  20. ohdave_rsa_encrypt,
  21. parse_age_limit,
  22. parse_duration,
  23. parse_iso8601,
  24. parse_resolution,
  25. qualities,
  26. remove_start,
  27. str_or_none,
  28. traverse_obj,
  29. urljoin,
  30. )
  31. def md5_text(text):
  32. return hashlib.md5(text.encode('utf-8')).hexdigest()
  33. class IqiyiIE(InfoExtractor):
  34. IE_NAME = 'iqiyi'
  35. IE_DESC = '爱奇艺'
  36. _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
  37. _NETRC_MACHINE = 'iqiyi'
  38. _TESTS = [{
  39. 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  40. # MD5 checksum differs on my machine and Travis CI
  41. 'info_dict': {
  42. 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  43. 'ext': 'mp4',
  44. 'title': '美国德州空中惊现奇异云团 酷似UFO',
  45. }
  46. }, {
  47. 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
  48. 'md5': 'b7dc800a4004b1b57749d9abae0472da',
  49. 'info_dict': {
  50. 'id': 'e3f585b550a280af23c98b6cb2be19fb',
  51. 'ext': 'mp4',
  52. # This can be either Simplified Chinese or Traditional Chinese
  53. 'title': r're:^(?:名侦探柯南 国语版:第752集 迫近灰原秘密的黑影 下篇|名偵探柯南 國語版:第752集 迫近灰原秘密的黑影 下篇)$',
  54. },
  55. 'skip': 'Geo-restricted to China',
  56. }, {
  57. 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
  58. 'only_matching': True,
  59. }, {
  60. 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
  61. 'only_matching': True,
  62. }, {
  63. 'url': 'http://yule.iqiyi.com/pcb.html',
  64. 'info_dict': {
  65. 'id': '4a0af228fddb55ec96398a364248ed7f',
  66. 'ext': 'mp4',
  67. 'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
  68. },
  69. }, {
  70. # VIP-only video. The first 2 parts (6 minutes) are available without login
  71. # MD5 sums omitted as values are different on Travis CI and my machine
  72. 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
  73. 'info_dict': {
  74. 'id': 'f3cf468b39dddb30d676f89a91200dc1',
  75. 'ext': 'mp4',
  76. 'title': '泰坦尼克号',
  77. },
  78. 'skip': 'Geo-restricted to China',
  79. }, {
  80. 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
  81. 'info_dict': {
  82. 'id': '202918101',
  83. 'title': '灌篮高手 国语版',
  84. },
  85. 'playlist_count': 101,
  86. }, {
  87. 'url': 'http://www.pps.tv/w_19rrbav0ph.html',
  88. 'only_matching': True,
  89. }]
  90. _FORMATS_MAP = {
  91. '96': 1, # 216p, 240p
  92. '1': 2, # 336p, 360p
  93. '2': 3, # 480p, 504p
  94. '21': 4, # 504p
  95. '4': 5, # 720p
  96. '17': 5, # 720p
  97. '5': 6, # 1072p, 1080p
  98. '18': 7, # 1080p
  99. }
  100. @staticmethod
  101. def _rsa_fun(data):
  102. # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
  103. N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
  104. e = 65537
  105. return ohdave_rsa_encrypt(data, e, N)
  106. def _perform_login(self):
  107. raise ExtractorError("iQiyi's non-free authentication algorithm has made login impossible", expected=True)
  108. def get_raw_data(self, tvid, video_id):
  109. tm = int(time.time() * 1000)
  110. key = 'd5fb4bd9d50c4be6948c97edd7254b0e'
  111. sc = md5_text(compat_str(tm) + key + tvid)
  112. params = {
  113. 'tvid': tvid,
  114. 'vid': video_id,
  115. 'src': '76f90cbd92f94a2e925d83e8ccd22cb7',
  116. 'sc': sc,
  117. 't': tm,
  118. }
  119. return self._download_json(
  120. 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id),
  121. video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='),
  122. query=params, headers=self.geo_verification_headers())
  123. def _extract_playlist(self, webpage):
  124. PAGE_SIZE = 50
  125. links = re.findall(
  126. r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
  127. webpage)
  128. if not links:
  129. return
  130. album_id = self._search_regex(
  131. r'albumId\s*:\s*(\d+),', webpage, 'album ID')
  132. album_title = self._search_regex(
  133. r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
  134. entries = list(map(self.url_result, links))
  135. # Start from 2 because links in the first page are already on webpage
  136. for page_num in itertools.count(2):
  137. pagelist_page = self._download_webpage(
  138. 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE),
  139. album_id,
  140. note='Download playlist page %d' % page_num,
  141. errnote='Failed to download playlist page %d' % page_num)
  142. pagelist = self._parse_json(
  143. remove_start(pagelist_page, 'var tvInfoJs='), album_id)
  144. vlist = pagelist['data']['vlist']
  145. for item in vlist:
  146. entries.append(self.url_result(item['vurl']))
  147. if len(vlist) < PAGE_SIZE:
  148. break
  149. return self.playlist_result(entries, album_id, album_title)
  150. def _real_extract(self, url):
  151. webpage = self._download_webpage(
  152. url, 'temp_id', note='download video page')
  153. # There's no simple way to determine whether an URL is a playlist or not
  154. # Sometimes there are playlist links in individual videos, so treat it
  155. # as a single video first
  156. tvid = self._search_regex(
  157. r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
  158. if tvid is None:
  159. playlist_result = self._extract_playlist(webpage)
  160. if playlist_result:
  161. return playlist_result
  162. raise ExtractorError('Can\'t find any video')
  163. video_id = self._search_regex(
  164. r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
  165. formats = []
  166. for _ in range(5):
  167. raw_data = self.get_raw_data(tvid, video_id)
  168. if raw_data['code'] != 'A00000':
  169. if raw_data['code'] == 'A00111':
  170. self.raise_geo_restricted()
  171. raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
  172. data = raw_data['data']
  173. for stream in data['vidl']:
  174. if 'm3utx' not in stream:
  175. continue
  176. vd = compat_str(stream['vd'])
  177. formats.append({
  178. 'url': stream['m3utx'],
  179. 'format_id': vd,
  180. 'ext': 'mp4',
  181. 'quality': self._FORMATS_MAP.get(vd, -1),
  182. 'protocol': 'm3u8_native',
  183. })
  184. if formats:
  185. break
  186. self._sleep(5, video_id)
  187. title = (get_element_by_id('widget-videotitle', webpage)
  188. or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
  189. or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
  190. return {
  191. 'id': video_id,
  192. 'title': title,
  193. 'formats': formats,
  194. }
  195. class IqIE(InfoExtractor):
  196. IE_NAME = 'iq.com'
  197. IE_DESC = 'International version of iQiyi'
  198. _VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P<id>\w+)'
  199. _TESTS = [{
  200. 'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4',
  201. 'md5': '2d7caf6eeca8a32b407094b33b757d39',
  202. 'info_dict': {
  203. 'ext': 'mp4',
  204. 'id': '1ma1i6ferf4',
  205. 'title': '航海王 第1000集',
  206. 'description': 'Subtitle available on Sunday 4PM(GMT+8).',
  207. 'duration': 1430,
  208. 'timestamp': 1637488203,
  209. 'upload_date': '20211121',
  210. 'episode_number': 1000,
  211. 'episode': 'Episode 1000',
  212. 'series': 'One Piece',
  213. 'age_limit': 13,
  214. 'average_rating': float,
  215. },
  216. 'params': {
  217. 'format': '500',
  218. },
  219. 'expected_warnings': ['format is restricted']
  220. }, {
  221. # VIP-restricted video
  222. 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4',
  223. 'only_matching': True
  224. }]
  225. _BID_TAGS = {
  226. '100': '240P',
  227. '200': '360P',
  228. '300': '480P',
  229. '500': '720P',
  230. '600': '1080P',
  231. '610': '1080P50',
  232. '700': '2K',
  233. '800': '4K',
  234. }
  235. _LID_TAGS = {
  236. '1': 'zh_CN',
  237. '2': 'zh_TW',
  238. '3': 'en',
  239. '4': 'kor',
  240. '18': 'th',
  241. '21': 'my',
  242. '23': 'vi',
  243. '24': 'id',
  244. '26': 'es',
  245. '28': 'ar',
  246. }
  247. _DASH_JS = '''
  248. console.log(page.evaluate(function() {
  249. var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
  250. var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
  251. var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
  252. var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x;
  253. var authKey = cmd5x(cmd5x('') + tm + '' + tvid);
  254. var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join('');
  255. var dash_paths = {};
  256. bid_list.forEach(function(bid) {
  257. var query = {
  258. 'tvid': tvid,
  259. 'bid': bid,
  260. 'ds': 1,
  261. 'vid': vid,
  262. 'src': src,
  263. 'vt': 0,
  264. 'rs': 1,
  265. 'uid': uid,
  266. 'ori': 'pcw',
  267. 'ps': 1,
  268. 'k_uid': k_uid,
  269. 'pt': 0,
  270. 'd': 0,
  271. 's': '',
  272. 'lid': '',
  273. 'slid': 0,
  274. 'cf': '',
  275. 'ct': '',
  276. 'authKey': authKey,
  277. 'k_tag': 1,
  278. 'ost': 0,
  279. 'ppt': 0,
  280. 'dfp': dfp,
  281. 'prio': JSON.stringify({
  282. 'ff': 'f4v',
  283. 'code': 2
  284. }),
  285. 'k_err_retries': 0,
  286. 'up': '',
  287. 'su': 2,
  288. 'applang': lang,
  289. 'sver': 2,
  290. 'X-USER-MODE': mode,
  291. 'qd_v': 2,
  292. 'tm': tm,
  293. 'qdy': 'a',
  294. 'qds': 0,
  295. 'k_ft1': 141287244169348,
  296. 'k_ft4': 34359746564,
  297. 'k_ft5': 1,
  298. 'bop': JSON.stringify({
  299. 'version': '10.0',
  300. 'dfp': dfp
  301. }),
  302. };
  303. var enc_params = [];
  304. for (var prop in query) {
  305. enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop]));
  306. }
  307. ut_list.forEach(function(ut) {
  308. enc_params.push('ut=' + ut);
  309. })
  310. var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
  311. dash_paths[bid] = dash_path;
  312. });
  313. return JSON.stringify(dash_paths);
  314. }));
  315. saveAndExit();
  316. '''
  317. def _extract_vms_player_js(self, webpage, video_id):
  318. player_js_cache = self.cache.load('iq', 'player_js')
  319. if player_js_cache:
  320. return player_js_cache
  321. webpack_js_url = self._proto_relative_url(self._search_regex(
  322. r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
  323. webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
  324. webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex(
  325. r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))]
  326. for module_index in reversed(list(webpack_map2.keys())):
  327. module_js = self._download_webpage(
  328. f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
  329. video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
  330. if 'vms request' in module_js:
  331. self.cache.store('iq', 'player_js', module_js)
  332. return module_js
  333. raise ExtractorError('Unable to extract player JS')
  334. def _extract_cmd5x_function(self, webpage, video_id):
  335. return self._search_regex(r',\s*(function\s*\([^\)]*\)\s*{\s*var _qda.+_qdc\(\)\s*})\s*,',
  336. self._extract_vms_player_js(webpage, video_id), 'signature function')
  337. def _update_bid_tags(self, webpage, video_id):
  338. extracted_bid_tags = self._parse_json(
  339. self._search_regex(
  340. r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd',
  341. self._extract_vms_player_js(webpage, video_id), 'video tags', default=''),
  342. video_id, transform_source=js_to_json, fatal=False)
  343. if not extracted_bid_tags:
  344. return
  345. self._BID_TAGS = {
  346. bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid))
  347. for bid in extracted_bid_tags.keys()
  348. }
  349. def _get_cookie(self, name, default=None):
  350. cookie = self._get_cookies('https://iq.com/').get(name)
  351. return cookie.value if cookie else default
  352. def _real_extract(self, url):
  353. video_id = self._match_id(url)
  354. webpage = self._download_webpage(url, video_id)
  355. self._update_bid_tags(webpage, video_id)
  356. next_props = self._search_nextjs_data(webpage, video_id)['props']
  357. page_data = next_props['initialState']['play']
  358. video_info = page_data['curVideoInfo']
  359. uid = traverse_obj(
  360. self._parse_json(
  361. self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False),
  362. ('data', 'uid'), default=0)
  363. if uid:
  364. vip_data = self._download_json(
  365. 'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={
  366. 'batch': 1,
  367. 'platformId': 3,
  368. 'modeCode': self._get_cookie('mod', 'intl'),
  369. 'langCode': self._get_cookie('lang', 'en_us'),
  370. 'deviceId': self._get_cookie('QC005', '')
  371. }, fatal=False)
  372. ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[])
  373. else:
  374. ut_list = ['0']
  375. # bid 0 as an initial format checker
  376. dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get(
  377. url, note2='Executing signature code (this may take a couple minutes)',
  378. html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % {
  379. 'tvid': video_info['tvId'],
  380. 'vid': video_info['vid'],
  381. 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
  382. expected_type=str, default='04022001010011000000'),
  383. 'uid': uid,
  384. 'dfp': self._get_cookie('dfp', ''),
  385. 'mode': self._get_cookie('mod', 'intl'),
  386. 'lang': self._get_cookie('lang', 'en_us'),
  387. 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
  388. 'ut_list': '[' + ','.join(ut_list) + ']',
  389. 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
  390. })[1].strip(), video_id)
  391. formats, subtitles = [], {}
  392. initial_format_data = self._download_json(
  393. urljoin('https://cache-video.iq.com', dash_paths['0']), video_id,
  394. note='Downloading initial video format info', errnote='Unable to download initial video format info')['data']
  395. preview_time = traverse_obj(
  396. initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
  397. if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
  398. self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
  399. # TODO: Extract audio-only formats
  400. for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
  401. dash_path = dash_paths.get(bid)
  402. if not dash_path:
  403. self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted')
  404. continue
  405. format_data = traverse_obj(self._download_json(
  406. urljoin('https://cache-video.iq.com', dash_path), video_id,
  407. note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data',
  408. fatal=False), 'data', expected_type=dict)
  409. video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid),
  410. expected_type=dict, default=[], get_all=False) or {}
  411. extracted_formats = []
  412. if video_format.get('m3u8Url'):
  413. extracted_formats.extend(self._extract_m3u8_formats(
  414. urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['m3u8Url']),
  415. 'mp4', m3u8_id=bid, fatal=False))
  416. if video_format.get('mpdUrl'):
  417. # TODO: Properly extract mpd hostname
  418. extracted_formats.extend(self._extract_mpd_formats(
  419. urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['mpdUrl']),
  420. mpd_id=bid, fatal=False))
  421. if video_format.get('m3u8'):
  422. ff = video_format.get('ff', 'ts')
  423. if ff == 'ts':
  424. m3u8_formats, _ = self._parse_m3u8_formats_and_subtitles(
  425. video_format['m3u8'], ext='mp4', m3u8_id=bid, fatal=False)
  426. extracted_formats.extend(m3u8_formats)
  427. elif ff == 'm4s':
  428. mpd_data = traverse_obj(
  429. self._parse_json(video_format['m3u8'], video_id, fatal=False), ('payload', ..., 'data'), expected_type=str)
  430. if not mpd_data:
  431. continue
  432. mpd_formats, _ = self._parse_mpd_formats_and_subtitles(
  433. mpd_data, bid, format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'))
  434. extracted_formats.extend(mpd_formats)
  435. else:
  436. self.report_warning(f'{ff} formats are currently not supported')
  437. if not extracted_formats:
  438. if video_format.get('s'):
  439. self.report_warning(f'{self._BID_TAGS[bid]} format is restricted')
  440. else:
  441. self.report_warning(f'Unable to extract {self._BID_TAGS[bid]} format')
  442. for f in extracted_formats:
  443. f.update({
  444. 'quality': qualities(list(self._BID_TAGS.keys()))(bid),
  445. 'format_note': self._BID_TAGS[bid],
  446. **parse_resolution(video_format.get('scrsz'))
  447. })
  448. formats.extend(extracted_formats)
  449. for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]):
  450. lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
  451. subtitles.setdefault(lang, []).extend([{
  452. 'ext': format_ext,
  453. 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key])
  454. } for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)])
  455. extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info
  456. return {
  457. 'id': video_id,
  458. 'title': video_info['name'],
  459. 'formats': formats,
  460. 'subtitles': subtitles,
  461. 'description': video_info.get('mergeDesc'),
  462. 'duration': parse_duration(video_info.get('len')),
  463. 'age_limit': parse_age_limit(video_info.get('rating')),
  464. 'average_rating': traverse_obj(page_data, ('playScoreInfo', 'score'), expected_type=float_or_none),
  465. 'timestamp': parse_iso8601(video_info.get('isoUploadDate')),
  466. 'categories': traverse_obj(extra_metadata, ('videoTagMap', ..., ..., 'name'), expected_type=str),
  467. 'cast': traverse_obj(extra_metadata, ('actorArr', ..., 'name'), expected_type=str),
  468. 'episode_number': int_or_none(video_info.get('order')) or None,
  469. 'series': video_info.get('albumName'),
  470. }
  471. class IqAlbumIE(InfoExtractor):
  472. IE_NAME = 'iq.com:album'
  473. _VALID_URL = r'https?://(?:www\.)?iq\.com/album/(?:[\w%-]*-)?(?P<id>\w+)'
  474. _TESTS = [{
  475. 'url': 'https://www.iq.com/album/one-piece-1999-1bk9icvr331',
  476. 'info_dict': {
  477. 'id': '1bk9icvr331',
  478. 'title': 'One Piece',
  479. 'description': 'Subtitle available on Sunday 4PM(GMT+8).'
  480. },
  481. 'playlist_mincount': 238
  482. }, {
  483. # Movie/single video
  484. 'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k',
  485. 'info_dict': {
  486. 'ext': 'mp4',
  487. 'id': '22yjnij099k',
  488. 'title': '九龙城寨',
  489. 'description': 'md5:8a09f50b8ba0db4dc69bc7c844228044',
  490. 'duration': 5000,
  491. 'timestamp': 1641911371,
  492. 'upload_date': '20220111',
  493. 'series': '九龙城寨',
  494. 'cast': ['Shi Yan Neng', 'Yu Lang', 'Peter lv', 'Sun Zi Jun', 'Yang Xiao Bo'],
  495. 'age_limit': 13,
  496. 'average_rating': float,
  497. },
  498. 'expected_warnings': ['format is restricted']
  499. }]
  500. def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'):
  501. for page_range in page_ranges:
  502. page = self._download_json(
  503. f'https://pcw-api.iq.com/api/episodeListSource/{album_id_num}', album_id,
  504. note=f'Downloading video list episodes {page_range.get("msg", "")}',
  505. errnote='Unable to download video list', query={
  506. 'platformId': 3,
  507. 'modeCode': mode_code,
  508. 'langCode': lang_code,
  509. 'endOrder': page_range['to'],
  510. 'startOrder': page_range['from']
  511. })
  512. for video in page['data']['epg']:
  513. yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']),
  514. IqIE.ie_key(), video.get('qipuIdStr'), video.get('name'))
  515. def _real_extract(self, url):
  516. album_id = self._match_id(url)
  517. webpage = self._download_webpage(url, album_id)
  518. next_data = self._search_nextjs_data(webpage, album_id)
  519. album_data = next_data['props']['initialState']['album']['videoAlbumInfo']
  520. if album_data.get('videoType') == 'singleVideo':
  521. return self.url_result('https://www.iq.com/play/%s' % album_id, IqIE.ie_key())
  522. return self.playlist_result(
  523. self._entries(album_data['albumId'], album_data['totalPageRange'], album_id,
  524. traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')),
  525. traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'langCode'))),
  526. album_id, album_data.get('name'), album_data.get('desc'))