wppilot.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. try_get,
  4. ExtractorError,
  5. )
  6. import json
  7. import random
  8. import re
  9. class WPPilotBaseIE(InfoExtractor):
  10. _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
  11. _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
  12. _HEADERS_WEB = {
  13. 'Content-Type': 'application/json; charset=UTF-8',
  14. 'Referer': 'https://pilot.wp.pl/tv/',
  15. }
  16. def _get_channel_list(self, cache=True):
  17. if cache is True:
  18. cache_res = self.cache.load('wppilot', 'channel-list')
  19. if cache_res:
  20. return cache_res, True
  21. webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
  22. page_data_base_url = self._search_regex(
  23. r'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
  24. webpage, 'gatsby build version') + '/page-data'
  25. page_data = self._download_json(f'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
  26. for qhash in page_data['staticQueryHashes']:
  27. qhash_content = self._download_json(
  28. f'{page_data_base_url}/sq/d/{qhash}.json', None,
  29. 'Searching for channel list')
  30. channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
  31. if channel_list is None:
  32. continue
  33. self.cache.store('wppilot', 'channel-list', channel_list)
  34. return channel_list, False
  35. raise ExtractorError('Unable to find the channel list')
  36. def _parse_channel(self, chan):
  37. return {
  38. 'id': str(chan['id']),
  39. 'title': chan['name'],
  40. 'is_live': True,
  41. 'thumbnails': [{
  42. 'id': key,
  43. 'url': chan[key],
  44. } for key in ('thumbnail', 'thumbnail_mobile', 'icon') if chan.get(key)],
  45. }
  46. class WPPilotIE(WPPilotBaseIE):
  47. _VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
  48. IE_NAME = 'wppilot'
  49. _TESTS = [{
  50. 'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
  51. 'info_dict': {
  52. 'id': '158',
  53. 'ext': 'mp4',
  54. 'title': 'Telewizja WP HD',
  55. },
  56. 'params': {
  57. 'format': 'bestvideo',
  58. },
  59. }, {
  60. # audio only
  61. 'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
  62. 'info_dict': {
  63. 'id': '238',
  64. 'ext': 'm4a',
  65. 'title': 'Radio Nowy Świat',
  66. },
  67. 'params': {
  68. 'format': 'bestaudio',
  69. },
  70. }, {
  71. 'url': 'wppilot:9',
  72. 'only_matching': True,
  73. }]
  74. def _get_channel(self, id_or_slug):
  75. video_list, is_cached = self._get_channel_list(cache=True)
  76. key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
  77. for video in video_list:
  78. if video.get(key) == id_or_slug:
  79. return self._parse_channel(video)
  80. # if cached channel not found, download and retry
  81. if is_cached:
  82. video_list, _ = self._get_channel_list(cache=False)
  83. for video in video_list:
  84. if video.get(key) == id_or_slug:
  85. return self._parse_channel(video)
  86. raise ExtractorError('Channel not found')
  87. def _real_extract(self, url):
  88. video_id = self._match_id(url)
  89. channel = self._get_channel(video_id)
  90. video_id = str(channel['id'])
  91. is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
  92. # cookies starting with "g:" are assigned to guests
  93. is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
  94. video = self._download_json(
  95. (self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id,
  96. video_id, query={
  97. 'device_type': 'web',
  98. }, headers=self._HEADERS_WEB,
  99. expected_status=(200, 422))
  100. stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
  101. if stream_token:
  102. close = self._download_json(
  103. 'https://pilot.wp.pl/api/v1/channels/close', video_id,
  104. 'Invalidating previous stream session', headers=self._HEADERS_WEB,
  105. data=json.dumps({
  106. 'channelId': video_id,
  107. 't': stream_token,
  108. }).encode('utf-8'))
  109. if try_get(close, lambda x: x['data']['status']) == 'ok':
  110. return self.url_result(url, ie=WPPilotIE.ie_key())
  111. formats = []
  112. for fmt in video['data']['stream_channel']['streams']:
  113. # live DASH does not work for now
  114. # if fmt['type'] == 'dash@live:abr':
  115. # formats.extend(
  116. # self._extract_mpd_formats(
  117. # random.choice(fmt['url']), video_id))
  118. if fmt['type'] == 'hls@live:abr':
  119. formats.extend(
  120. self._extract_m3u8_formats(
  121. random.choice(fmt['url']),
  122. video_id, live=True))
  123. channel['formats'] = formats
  124. return channel
  125. class WPPilotChannelsIE(WPPilotBaseIE):
  126. _VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
  127. IE_NAME = 'wppilot:channels'
  128. _TESTS = [{
  129. 'url': 'wppilot:',
  130. 'info_dict': {
  131. 'id': 'wppilot',
  132. 'title': 'WP Pilot',
  133. },
  134. 'playlist_mincount': 100,
  135. }, {
  136. 'url': 'https://pilot.wp.pl/',
  137. 'only_matching': True,
  138. }]
  139. def _entries(self):
  140. channel_list, _ = self._get_channel_list()
  141. for chan in channel_list:
  142. entry = self._parse_channel(chan)
  143. entry.update({
  144. '_type': 'url_transparent',
  145. 'url': f'wppilot:{chan["id"]}',
  146. 'ie_key': WPPilotIE.ie_key(),
  147. })
  148. yield entry
  149. def _real_extract(self, url):
  150. return self.playlist_result(self._entries(), 'wppilot', 'WP Pilot')