teletask.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import unified_strdate
  4. class TeleTaskIE(InfoExtractor):
  5. _VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P<id>[0-9]+)'
  6. _TEST = {
  7. 'url': 'http://www.tele-task.de/archive/video/html5/26168/',
  8. 'info_dict': {
  9. 'id': '26168',
  10. 'title': 'Duplicate Detection',
  11. },
  12. 'playlist': [{
  13. 'md5': '290ef69fb2792e481169c3958dbfbd57',
  14. 'info_dict': {
  15. 'id': '26168-speaker',
  16. 'ext': 'mp4',
  17. 'title': 'Duplicate Detection',
  18. 'upload_date': '20141218',
  19. }
  20. }, {
  21. 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4',
  22. 'info_dict': {
  23. 'id': '26168-slides',
  24. 'ext': 'mp4',
  25. 'title': 'Duplicate Detection',
  26. 'upload_date': '20141218',
  27. }
  28. }]
  29. }
  30. def _real_extract(self, url):
  31. lecture_id = self._match_id(url)
  32. webpage = self._download_webpage(url, lecture_id)
  33. title = self._html_search_regex(
  34. r'itemprop="name">([^<]+)</a>', webpage, 'title')
  35. upload_date = unified_strdate(self._html_search_regex(
  36. r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False))
  37. entries = [{
  38. 'id': '%s-%s' % (lecture_id, format_id),
  39. 'url': video_url,
  40. 'title': title,
  41. 'upload_date': upload_date,
  42. } for format_id, video_url in re.findall(
  43. r'<video class="([^"]+)"[^>]*>\s*<source src="([^"]+)"', webpage)]
  44. return self.playlist_result(entries, lecture_id, title)