closertotruth.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import re
  2. from .common import InfoExtractor
  3. class CloserToTruthIE(InfoExtractor):
  4. _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  5. _TESTS = [{
  6. 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
  7. 'info_dict': {
  8. 'id': '0_zof1ktre',
  9. 'display_id': 'solutions-the-mind-body-problem',
  10. 'ext': 'mov',
  11. 'title': 'Solutions to the Mind-Body Problem?',
  12. 'upload_date': '20140221',
  13. 'timestamp': 1392956007,
  14. 'uploader_id': 'CTTXML'
  15. },
  16. 'params': {
  17. 'skip_download': True,
  18. },
  19. }, {
  20. 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
  21. 'info_dict': {
  22. 'id': '0_iuxai6g6',
  23. 'display_id': 'how-do-brains-work',
  24. 'ext': 'mov',
  25. 'title': 'How do Brains Work?',
  26. 'upload_date': '20140221',
  27. 'timestamp': 1392956024,
  28. 'uploader_id': 'CTTXML'
  29. },
  30. 'params': {
  31. 'skip_download': True,
  32. },
  33. }, {
  34. 'url': 'http://closertotruth.com/interviews/1725',
  35. 'info_dict': {
  36. 'id': '1725',
  37. 'title': 'AyaFr-002',
  38. },
  39. 'playlist_mincount': 2,
  40. }]
  41. def _real_extract(self, url):
  42. display_id = self._match_id(url)
  43. webpage = self._download_webpage(url, display_id)
  44. partner_id = self._search_regex(
  45. r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
  46. webpage, 'kaltura partner_id')
  47. title = self._html_extract_title(webpage, 'video title')
  48. select = self._search_regex(
  49. r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
  50. webpage, 'select version', default=None)
  51. if select:
  52. entry_ids = set()
  53. entries = []
  54. for mobj in re.finditer(
  55. r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
  56. webpage):
  57. entry_id = mobj.group('id')
  58. if entry_id in entry_ids:
  59. continue
  60. entry_ids.add(entry_id)
  61. entries.append({
  62. '_type': 'url_transparent',
  63. 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
  64. 'ie_key': 'Kaltura',
  65. 'title': mobj.group('title'),
  66. })
  67. if entries:
  68. return self.playlist_result(entries, display_id, title)
  69. entry_id = self._search_regex(
  70. r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
  71. webpage, 'kaltura entry_id', group='id')
  72. return {
  73. '_type': 'url_transparent',
  74. 'display_id': display_id,
  75. 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
  76. 'ie_key': 'Kaltura',
  77. 'title': title
  78. }