iheart.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. clean_podcast_url,
  5. int_or_none,
  6. str_or_none,
  7. )
  8. class IHeartRadioBaseIE(InfoExtractor):
  9. def _call_api(self, path, video_id, fatal=True, query=None):
  10. return self._download_json(
  11. 'https://api.iheart.com/api/v3/podcast/' + path,
  12. video_id, fatal=fatal, query=query)
  13. def _extract_episode(self, episode):
  14. return {
  15. 'thumbnail': episode.get('imageUrl'),
  16. 'description': clean_html(episode.get('description')),
  17. 'timestamp': int_or_none(episode.get('startDate'), 1000),
  18. 'duration': int_or_none(episode.get('duration')),
  19. }
  20. class IHeartRadioIE(IHeartRadioBaseIE):
  21. IENAME = 'iheartradio'
  22. _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
  23. _TEST = {
  24. 'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
  25. 'md5': 'c8609c92c8688dcb69d8541042b8abca',
  26. 'info_dict': {
  27. 'id': '70346499',
  28. 'ext': 'mp3',
  29. 'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
  30. 'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
  31. 'timestamp': 1597741200,
  32. 'upload_date': '20200818',
  33. }
  34. }
  35. def _real_extract(self, url):
  36. episode_id = self._match_id(url)
  37. episode = self._call_api(
  38. 'episodes/' + episode_id, episode_id)['episode']
  39. info = self._extract_episode(episode)
  40. info.update({
  41. 'id': episode_id,
  42. 'title': episode['title'],
  43. 'url': clean_podcast_url(episode['mediaUrl']),
  44. })
  45. return info
  46. class IHeartRadioPodcastIE(IHeartRadioBaseIE):
  47. IE_NAME = 'iheartradio:podcast'
  48. _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
  49. _TESTS = [{
  50. 'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
  51. 'info_dict': {
  52. 'id': '30717896',
  53. 'title': 'It Could Happen Here',
  54. 'description': 'md5:5842117412a967eb0b01f8088eb663e2',
  55. },
  56. 'playlist_mincount': 11,
  57. }, {
  58. 'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
  59. 'only_matching': True,
  60. }]
  61. def _real_extract(self, url):
  62. podcast_id = self._match_id(url)
  63. path = 'podcasts/' + podcast_id
  64. episodes = self._call_api(
  65. path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
  66. entries = []
  67. for episode in episodes:
  68. episode_id = str_or_none(episode.get('id'))
  69. if not episode_id:
  70. continue
  71. info = self._extract_episode(episode)
  72. info.update({
  73. '_type': 'url',
  74. 'id': episode_id,
  75. 'title': episode.get('title'),
  76. 'url': 'iheartradio:' + episode_id,
  77. 'ie_key': IHeartRadioIE.ie_key(),
  78. })
  79. entries.append(info)
  80. podcast = self._call_api(path, podcast_id, False) or {}
  81. return self.playlist_result(
  82. entries, podcast_id, podcast.get('title'), podcast.get('description'))