echomsk.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import re
  2. from .common import InfoExtractor
  3. class EchoMskIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
  5. _TEST = {
  6. 'url': 'http://www.echo.msk.ru/sounds/1464134.html',
  7. 'md5': '2e44b3b78daff5b458e4dbc37f191f7c',
  8. 'info_dict': {
  9. 'id': '1464134',
  10. 'ext': 'mp3',
  11. 'title': 'Особое мнение - 29 декабря 2014, 19:08',
  12. },
  13. }
  14. def _real_extract(self, url):
  15. video_id = self._match_id(url)
  16. webpage = self._download_webpage(url, video_id)
  17. audio_url = self._search_regex(
  18. r'<a rel="mp3" href="([^"]+)">', webpage, 'audio URL')
  19. title = self._html_search_regex(
  20. r'<a href="/programs/[^"]+" target="_blank">([^<]+)</a>',
  21. webpage, 'title')
  22. air_date = self._html_search_regex(
  23. r'(?s)<div class="date">(.+?)</div>',
  24. webpage, 'date', fatal=False, default=None)
  25. if air_date:
  26. air_date = re.sub(r'(\s)\1+', r'\1', air_date)
  27. if air_date:
  28. title = '%s - %s' % (title, air_date)
  29. return {
  30. 'id': video_id,
  31. 'url': audio_url,
  32. 'title': title,
  33. }