vrak.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import re
  2. from .common import InfoExtractor
  3. from .brightcove import BrightcoveNewIE
  4. from ..utils import (
  5. int_or_none,
  6. parse_age_limit,
  7. smuggle_url,
  8. unescapeHTML,
  9. )
  10. class VrakIE(InfoExtractor):
  11. _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?\btarget=(?P<id>[\d.]+)'
  12. _TEST = {
  13. 'url': 'http://www.vrak.tv/videos?target=1.2306782&filtre=emission&id=1.1806721',
  14. 'info_dict': {
  15. 'id': '5345661243001',
  16. 'ext': 'mp4',
  17. 'title': 'Obésité, film de hockey et Roseline Filion',
  18. 'timestamp': 1488492126,
  19. 'upload_date': '20170302',
  20. 'uploader_id': '2890187628001',
  21. 'creator': 'VRAK.TV',
  22. 'age_limit': 8,
  23. 'series': 'ALT (Actualité Légèrement Tordue)',
  24. 'episode': 'Obésité, film de hockey et Roseline Filion',
  25. 'tags': list,
  26. },
  27. 'params': {
  28. 'skip_download': True,
  29. },
  30. }
  31. BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2890187628001/default_default/index.html?videoId=%s'
  32. def _real_extract(self, url):
  33. video_id = self._match_id(url)
  34. webpage = self._download_webpage(url, video_id)
  35. title = self._html_search_regex(
  36. r'<h\d\b[^>]+\bclass=["\']videoTitle["\'][^>]*>([^<]+)',
  37. webpage, 'title', default=None) or self._og_search_title(webpage)
  38. content = self._parse_json(
  39. self._search_regex(
  40. r'data-player-options-content=(["\'])(?P<content>{.+?})\1',
  41. webpage, 'content', default='{}', group='content'),
  42. video_id, transform_source=unescapeHTML)
  43. ref_id = content.get('refId') or self._search_regex(
  44. r'refId&quot;:&quot;([^&]+)&quot;', webpage, 'ref id')
  45. brightcove_id = self._search_regex(
  46. r'''(?x)
  47. java\.lang\.String\s+value\s*=\s*["']brightcove\.article\.\d+\.%s
  48. [^>]*
  49. java\.lang\.String\s+value\s*=\s*["'](\d+)
  50. ''' % re.escape(ref_id), webpage, 'brightcove id')
  51. return {
  52. '_type': 'url_transparent',
  53. 'ie_key': BrightcoveNewIE.ie_key(),
  54. 'url': smuggle_url(
  55. self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
  56. {'geo_countries': ['CA']}),
  57. 'id': brightcove_id,
  58. 'description': content.get('description'),
  59. 'creator': content.get('brand'),
  60. 'age_limit': parse_age_limit(content.get('rating')),
  61. 'series': content.get('showName') or content.get(
  62. 'episodeName'), # this is intentional
  63. 'season_number': int_or_none(content.get('seasonNumber')),
  64. 'episode': title,
  65. 'episode_number': int_or_none(content.get('episodeNumber')),
  66. 'tags': content.get('tags', []),
  67. }