radiokapital.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. traverse_obj,
  5. unescapeHTML,
  6. )
  7. import itertools
  8. from urllib.parse import urlencode
  9. class RadioKapitalBaseIE(InfoExtractor):
  10. def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
  11. return self._download_json(
  12. f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
  13. video_id, note=note)
  14. def _parse_episode(self, data):
  15. release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
  16. return {
  17. '_type': 'url_transparent',
  18. 'url': data['mixcloud_url'],
  19. 'ie_key': 'Mixcloud',
  20. 'title': unescapeHTML(data['title']),
  21. 'description': clean_html(data.get('content')),
  22. 'tags': traverse_obj(data, ('tags', ..., 'name')),
  23. 'release_date': release,
  24. 'series': traverse_obj(data, ('show', 'title')),
  25. }
  26. class RadioKapitalIE(RadioKapitalBaseIE):
  27. IE_NAME = 'radiokapital'
  28. _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
  29. _TESTS = [{
  30. 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
  31. 'info_dict': {
  32. 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
  33. 'ext': 'm4a',
  34. 'title': '#5: It’s okay to\xa0be\xa0immaterial',
  35. 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
  36. 'uploader': 'Radio Kapitał',
  37. 'uploader_id': 'radiokapital',
  38. 'timestamp': 1621640164,
  39. 'upload_date': '20210521',
  40. },
  41. }]
  42. def _real_extract(self, url):
  43. video_id = self._match_id(url)
  44. episode = self._call_api('episodes/%s' % video_id, video_id)
  45. return self._parse_episode(episode)
  46. class RadioKapitalShowIE(RadioKapitalBaseIE):
  47. IE_NAME = 'radiokapital:show'
  48. _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
  49. _TESTS = [{
  50. 'url': 'https://radiokapital.pl/shows/wesz',
  51. 'info_dict': {
  52. 'id': '100',
  53. 'title': 'WĘSZ',
  54. 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c',
  55. },
  56. 'playlist_mincount': 17,
  57. }]
  58. def _get_episode_list(self, series_id, page_no):
  59. return self._call_api(
  60. 'episodes', series_id,
  61. f'Downloading episode list page #{page_no}', qs={
  62. 'show': series_id,
  63. 'page': page_no,
  64. })
  65. def _entries(self, series_id):
  66. for page_no in itertools.count(1):
  67. episode_list = self._get_episode_list(series_id, page_no)
  68. yield from (self._parse_episode(ep) for ep in episode_list['items'])
  69. if episode_list['next'] is None:
  70. break
  71. def _real_extract(self, url):
  72. series_id = self._match_id(url)
  73. show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata')
  74. entries = self._entries(series_id)
  75. return {
  76. '_type': 'playlist',
  77. 'entries': entries,
  78. 'id': str(show['id']),
  79. 'title': show.get('title'),
  80. 'description': clean_html(show.get('content')),
  81. }