abematv.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. import base64
  2. import binascii
  3. import functools
  4. import hashlib
  5. import hmac
  6. import io
  7. import json
  8. import re
  9. import struct
  10. import time
  11. import urllib.parse
  12. import urllib.request
  13. import urllib.response
  14. import uuid
  15. from .common import InfoExtractor
  16. from ..aes import aes_ecb_decrypt
  17. from ..utils import (
  18. ExtractorError,
  19. bytes_to_intlist,
  20. decode_base_n,
  21. int_or_none,
  22. intlist_to_bytes,
  23. OnDemandPagedList,
  24. request_to_url,
  25. time_seconds,
  26. traverse_obj,
  27. update_url_query,
  28. )
  29. # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
  30. def add_opener(ydl, handler):
  31. ''' Add a handler for opening URLs, like _download_webpage '''
  32. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
  33. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
  34. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  35. ydl._opener.add_handler(handler)
  36. def remove_opener(ydl, handler):
  37. '''
  38. Remove handler(s) for opening URLs
  39. @param handler Either handler object itself or handler type.
  40. Specifying handler type will remove all handler which isinstance returns True.
  41. '''
  42. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
  43. # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
  44. opener = ydl._opener
  45. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  46. if isinstance(handler, (type, tuple)):
  47. find_cp = lambda x: isinstance(x, handler)
  48. else:
  49. find_cp = lambda x: x is handler
  50. removed = []
  51. for meth in dir(handler):
  52. if meth in ["redirect_request", "do_open", "proxy_open"]:
  53. # oops, coincidental match
  54. continue
  55. i = meth.find("_")
  56. protocol = meth[:i]
  57. condition = meth[i + 1:]
  58. if condition.startswith("error"):
  59. j = condition.find("_") + i + 1
  60. kind = meth[j + 1:]
  61. try:
  62. kind = int(kind)
  63. except ValueError:
  64. pass
  65. lookup = opener.handle_error.get(protocol, {})
  66. opener.handle_error[protocol] = lookup
  67. elif condition == "open":
  68. kind = protocol
  69. lookup = opener.handle_open
  70. elif condition == "response":
  71. kind = protocol
  72. lookup = opener.process_response
  73. elif condition == "request":
  74. kind = protocol
  75. lookup = opener.process_request
  76. else:
  77. continue
  78. handlers = lookup.setdefault(kind, [])
  79. if handlers:
  80. handlers[:] = [x for x in handlers if not find_cp(x)]
  81. removed.append(x for x in handlers if find_cp(x))
  82. if removed:
  83. for x in opener.handlers:
  84. if find_cp(x):
  85. x.add_parent(None)
  86. opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
  87. class AbemaLicenseHandler(urllib.request.BaseHandler):
  88. handler_order = 499
  89. STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
  90. HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
  91. def __init__(self, ie: 'AbemaTVIE'):
  92. # the protocol that this should really handle is 'abematv-license://'
  93. # abematv_license_open is just a placeholder for development purposes
  94. # ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
  95. setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open'))
  96. self.ie = ie
  97. def _get_videokey_from_ticket(self, ticket):
  98. to_show = self.ie.get_param('verbose', False)
  99. media_token = self.ie._get_media_token(to_show=to_show)
  100. license_response = self.ie._download_json(
  101. 'https://license.abema.io/abematv-hls', None, note='Requesting playback license' if to_show else False,
  102. query={'t': media_token},
  103. data=json.dumps({
  104. 'kv': 'a',
  105. 'lt': ticket
  106. }).encode('utf-8'),
  107. headers={
  108. 'Content-Type': 'application/json',
  109. })
  110. res = decode_base_n(license_response['k'], table=self.STRTABLE)
  111. encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
  112. h = hmac.new(
  113. binascii.unhexlify(self.HKEY),
  114. (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
  115. digestmod=hashlib.sha256)
  116. enckey = bytes_to_intlist(h.digest())
  117. return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
  118. def abematv_license_open(self, url):
  119. url = request_to_url(url)
  120. ticket = urllib.parse.urlparse(url).netloc
  121. response_data = self._get_videokey_from_ticket(ticket)
  122. return urllib.response.addinfourl(io.BytesIO(response_data), headers={
  123. 'Content-Length': len(response_data),
  124. }, url=url, code=200)
  125. class AbemaTVBaseIE(InfoExtractor):
  126. _USERTOKEN = None
  127. _DEVICE_ID = None
  128. _MEDIATOKEN = None
  129. _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
  130. @classmethod
  131. def _generate_aks(cls, deviceid):
  132. deviceid = deviceid.encode('utf-8')
  133. # add 1 hour and then drop minute and secs
  134. ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
  135. time_struct = time.gmtime(ts_1hour)
  136. ts_1hour_str = str(ts_1hour).encode('utf-8')
  137. tmp = None
  138. def mix_once(nonce):
  139. nonlocal tmp
  140. h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
  141. h.update(nonce)
  142. tmp = h.digest()
  143. def mix_tmp(count):
  144. nonlocal tmp
  145. for i in range(count):
  146. mix_once(tmp)
  147. def mix_twist(nonce):
  148. nonlocal tmp
  149. mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
  150. mix_once(cls._SECRETKEY)
  151. mix_tmp(time_struct.tm_mon)
  152. mix_twist(deviceid)
  153. mix_tmp(time_struct.tm_mday % 5)
  154. mix_twist(ts_1hour_str)
  155. mix_tmp(time_struct.tm_hour % 5)
  156. return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8')
  157. def _get_device_token(self):
  158. if self._USERTOKEN:
  159. return self._USERTOKEN
  160. AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
  161. aks = self._generate_aks(self._DEVICE_ID)
  162. user_data = self._download_json(
  163. 'https://api.abema.io/v1/users', None, note='Authorizing',
  164. data=json.dumps({
  165. 'deviceId': self._DEVICE_ID,
  166. 'applicationKeySecret': aks,
  167. }).encode('utf-8'),
  168. headers={
  169. 'Content-Type': 'application/json',
  170. })
  171. AbemaTVBaseIE._USERTOKEN = user_data['token']
  172. # don't allow adding it 2 times or more, though it's guarded
  173. remove_opener(self._downloader, AbemaLicenseHandler)
  174. add_opener(self._downloader, AbemaLicenseHandler(self))
  175. return self._USERTOKEN
  176. def _get_media_token(self, invalidate=False, to_show=True):
  177. if not invalidate and self._MEDIATOKEN:
  178. return self._MEDIATOKEN
  179. AbemaTVBaseIE._MEDIATOKEN = self._download_json(
  180. 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
  181. query={
  182. 'osName': 'android',
  183. 'osVersion': '6.0.1',
  184. 'osLang': 'ja_JP',
  185. 'osTimezone': 'Asia/Tokyo',
  186. 'appId': 'tv.abema',
  187. 'appVersion': '3.27.1'
  188. }, headers={
  189. 'Authorization': f'bearer {self._get_device_token()}',
  190. })['token']
  191. return self._MEDIATOKEN
  192. def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
  193. return self._download_json(
  194. f'https://api.abema.io/{endpoint}', video_id, query=query or {},
  195. note=note,
  196. headers={
  197. 'Authorization': f'bearer {self._get_device_token()}',
  198. })
  199. def _extract_breadcrumb_list(self, webpage, video_id):
  200. for jld in re.finditer(
  201. r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  202. webpage):
  203. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  204. if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
  205. continue
  206. items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
  207. if items:
  208. return items
  209. return []
  210. class AbemaTVIE(AbemaTVBaseIE):
  211. _VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
  212. _NETRC_MACHINE = 'abematv'
  213. _TESTS = [{
  214. 'url': 'https://abema.tv/video/episode/194-25_s2_p1',
  215. 'info_dict': {
  216. 'id': '194-25_s2_p1',
  217. 'title': '第1話 「チーズケーキ」 「モーニング再び」',
  218. 'series': '異世界食堂2',
  219. 'series_number': 2,
  220. 'episode': '第1話 「チーズケーキ」 「モーニング再び」',
  221. 'episode_number': 1,
  222. },
  223. 'skip': 'expired',
  224. }, {
  225. 'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
  226. 'info_dict': {
  227. 'id': 'E8tvAnMJ7a9a5d',
  228. 'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  229. 'series': 'ゆるキャン△ SEASON2',
  230. 'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
  231. 'series_number': 2,
  232. 'episode_number': 1,
  233. 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
  234. },
  235. 'skip': 'expired',
  236. }, {
  237. 'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
  238. 'info_dict': {
  239. 'id': 'E8tvAnMJ7a9a5d',
  240. 'title': '第5話『光射す』',
  241. 'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
  242. 'thumbnail': r're:https://hayabusa\.io/.+',
  243. 'series': '相棒',
  244. 'episode': '第5話『光射す』',
  245. },
  246. 'skip': 'expired',
  247. }, {
  248. 'url': 'https://abema.tv/now-on-air/abema-anime',
  249. 'info_dict': {
  250. 'id': 'abema-anime',
  251. # this varies
  252. # 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
  253. 'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
  254. 'is_live': True,
  255. },
  256. 'skip': 'Not supported until hypervideo implements native live downloader OR AbemaTV can start a local HTTP server',
  257. }]
  258. _TIMETABLE = None
  259. def _perform_login(self, username, password):
  260. if '@' in username: # don't strictly check if it's email address or not
  261. ep, method = 'user/email', 'email'
  262. else:
  263. ep, method = 'oneTimePassword', 'userId'
  264. login_response = self._download_json(
  265. f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
  266. data=json.dumps({
  267. method: username,
  268. 'password': password
  269. }).encode('utf-8'), headers={
  270. 'Authorization': f'bearer {self._get_device_token()}',
  271. 'Origin': 'https://abema.tv',
  272. 'Referer': 'https://abema.tv/',
  273. 'Content-Type': 'application/json',
  274. })
  275. AbemaTVBaseIE._USERTOKEN = login_response['token']
  276. self._get_media_token(True)
  277. def _real_extract(self, url):
  278. # starting download using infojson from this extractor is undefined behavior,
  279. # and never be fixed in the future; you must trigger downloads by directly specifying URL.
  280. # (unless there's a way to hook before downloading by extractor)
  281. video_id, video_type = self._match_valid_url(url).group('id', 'type')
  282. headers = {
  283. 'Authorization': 'Bearer ' + self._get_device_token(),
  284. }
  285. video_type = video_type.split('/')[-1]
  286. webpage = self._download_webpage(url, video_id)
  287. canonical_url = self._search_regex(
  288. r'<link\s+rel="canonical"\s*href="(.+?)"', webpage, 'canonical URL',
  289. default=url)
  290. info = self._search_json_ld(webpage, video_id, default={})
  291. title = self._search_regex(
  292. r'<span\s*class=".+?EpisodeTitleBlock__title">(.+?)</span>', webpage, 'title', default=None)
  293. if not title:
  294. jsonld = None
  295. for jld in re.finditer(
  296. r'(?is)<span\s*class="com-m-Thumbnail__image">(?:</span>)?<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
  297. webpage):
  298. jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
  299. if jsonld:
  300. break
  301. if jsonld:
  302. title = jsonld.get('caption')
  303. if not title and video_type == 'now-on-air':
  304. if not self._TIMETABLE:
  305. # cache the timetable because it goes to 5MiB in size (!!)
  306. self._TIMETABLE = self._download_json(
  307. 'https://api.abema.io/v1/timetable/dataSet?debug=false', video_id,
  308. headers=headers)
  309. now = time_seconds(hours=9)
  310. for slot in self._TIMETABLE.get('slots', []):
  311. if slot.get('channelId') != video_id:
  312. continue
  313. if slot['startAt'] <= now and now < slot['endAt']:
  314. title = slot['title']
  315. break
  316. # read breadcrumb on top of page
  317. breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
  318. if breadcrumb:
  319. # breadcrumb list translates to: (e.g. 1st test for this IE)
  320. # Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
  321. # hence this works
  322. info['series'] = breadcrumb[-2]
  323. info['episode'] = breadcrumb[-1]
  324. if not title:
  325. title = info['episode']
  326. description = self._html_search_regex(
  327. (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
  328. r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
  329. webpage, 'description', default=None, group=1)
  330. if not description:
  331. og_desc = self._html_search_meta(
  332. ('description', 'og:description', 'twitter:description'), webpage)
  333. if og_desc:
  334. description = re.sub(r'''(?sx)
  335. ^(.+?)(?:
  336. アニメの動画を無料で見るならABEMA!| # anime
  337. 等、.+ # applies for most of categories
  338. )?
  339. ''', r'\1', og_desc)
  340. # canonical URL may contain series and episode number
  341. mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
  342. if mobj:
  343. seri = int_or_none(mobj.group(1), default=float('inf'))
  344. epis = int_or_none(mobj.group(2), default=float('inf'))
  345. info['series_number'] = seri if seri < 100 else None
  346. # some anime like Detective Conan (though not available in AbemaTV)
  347. # has more than 1000 episodes (1026 as of 2021/11/15)
  348. info['episode_number'] = epis if epis < 2000 else None
  349. is_live, m3u8_url = False, None
  350. if video_type == 'now-on-air':
  351. is_live = True
  352. channel_url = 'https://api.abema.io/v1/channels'
  353. if video_id == 'news-global':
  354. channel_url = update_url_query(channel_url, {'division': '1'})
  355. onair_channels = self._download_json(channel_url, video_id)
  356. for ch in onair_channels['channels']:
  357. if video_id == ch['id']:
  358. m3u8_url = ch['playback']['hls']
  359. break
  360. else:
  361. raise ExtractorError(f'Cannot find on-air {video_id} channel.', expected=True)
  362. elif video_type == 'episode':
  363. api_response = self._download_json(
  364. f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
  365. note='Checking playability',
  366. headers=headers)
  367. ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[])
  368. if 3 not in ondemand_types:
  369. # cannot acquire decryption key for these streams
  370. self.report_warning('This is a premium-only stream')
  371. m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
  372. elif video_type == 'slots':
  373. api_response = self._download_json(
  374. f'https://api.abema.io/v1/media/slots/{video_id}', video_id,
  375. note='Checking playability',
  376. headers=headers)
  377. if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
  378. self.report_warning('This is a premium-only stream')
  379. m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
  380. else:
  381. raise ExtractorError('Unreachable')
  382. if is_live:
  383. self.report_warning("This is a livestream; hypervideo doesn't support downloading natively, but FFmpeg cannot handle m3u8 manifests from AbemaTV")
  384. self.report_warning('Please consider using Streamlink to download these streams (https://github.com/streamlink/streamlink)')
  385. formats = self._extract_m3u8_formats(
  386. m3u8_url, video_id, ext='mp4', live=is_live)
  387. info.update({
  388. 'id': video_id,
  389. 'title': title,
  390. 'description': description,
  391. 'formats': formats,
  392. 'is_live': is_live,
  393. })
  394. return info
  395. class AbemaTVTitleIE(AbemaTVBaseIE):
  396. _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
  397. _PAGE_SIZE = 25
  398. _TESTS = [{
  399. 'url': 'https://abema.tv/video/title/90-1597',
  400. 'info_dict': {
  401. 'id': '90-1597',
  402. 'title': 'シャッフルアイランド',
  403. },
  404. 'playlist_mincount': 2,
  405. }, {
  406. 'url': 'https://abema.tv/video/title/193-132',
  407. 'info_dict': {
  408. 'id': '193-132',
  409. 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
  410. },
  411. 'playlist_mincount': 16,
  412. }, {
  413. 'url': 'https://abema.tv/video/title/25-102',
  414. 'info_dict': {
  415. 'id': '25-102',
  416. 'title': 'ソードアート・オンライン アリシゼーション',
  417. },
  418. 'playlist_mincount': 24,
  419. }]
  420. def _fetch_page(self, playlist_id, series_version, page):
  421. programs = self._call_api(
  422. f'v1/video/series/{playlist_id}/programs', playlist_id,
  423. note=f'Downloading page {page + 1}',
  424. query={
  425. 'seriesVersion': series_version,
  426. 'offset': str(page * self._PAGE_SIZE),
  427. 'order': 'seq',
  428. 'limit': str(self._PAGE_SIZE),
  429. })
  430. yield from (
  431. self.url_result(f'https://abema.tv/video/episode/{x}')
  432. for x in traverse_obj(programs, ('programs', ..., 'id'), default=[]))
  433. def _entries(self, playlist_id, series_version):
  434. return OnDemandPagedList(
  435. functools.partial(self._fetch_page, playlist_id, series_version),
  436. self._PAGE_SIZE)
  437. def _real_extract(self, url):
  438. playlist_id = self._match_id(url)
  439. series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
  440. return self.playlist_result(
  441. self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
  442. playlist_title=series_info.get('title'),
  443. playlist_description=series_info.get('content'))