europa.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. int_or_none,
  4. orderedSet,
  5. parse_duration,
  6. parse_qs,
  7. qualities,
  8. unified_strdate,
  9. xpath_text
  10. )
  11. class EuropaIE(InfoExtractor):
  12. _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
  13. _TESTS = [{
  14. 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
  15. 'md5': '574f080699ddd1e19a675b0ddf010371',
  16. 'info_dict': {
  17. 'id': 'I107758',
  18. 'ext': 'mp4',
  19. 'title': 'TRADE - Wikileaks on TTIP',
  20. 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
  21. 'thumbnail': r're:^https?://.*\.jpg$',
  22. 'upload_date': '20150811',
  23. 'duration': 34,
  24. 'view_count': int,
  25. 'formats': 'mincount:3',
  26. }
  27. }, {
  28. 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
  29. 'only_matching': True,
  30. }, {
  31. 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
  32. 'only_matching': True,
  33. }]
  34. def _real_extract(self, url):
  35. video_id = self._match_id(url)
  36. playlist = self._download_xml(
  37. 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id)
  38. def get_item(type_, preference):
  39. items = {}
  40. for item in playlist.findall('./info/%s/item' % type_):
  41. lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None)
  42. if lang and label:
  43. items[lang] = label.strip()
  44. for p in preference:
  45. if items.get(p):
  46. return items[p]
  47. query = parse_qs(url)
  48. preferred_lang = query.get('sitelang', ('en', ))[0]
  49. preferred_langs = orderedSet((preferred_lang, 'en', 'int'))
  50. title = get_item('title', preferred_langs) or video_id
  51. description = get_item('description', preferred_langs)
  52. thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
  53. upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
  54. duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
  55. view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
  56. language_preference = qualities(preferred_langs[::-1])
  57. formats = []
  58. for file_ in playlist.findall('./files/file'):
  59. video_url = xpath_text(file_, './url')
  60. if not video_url:
  61. continue
  62. lang = xpath_text(file_, './lg')
  63. formats.append({
  64. 'url': video_url,
  65. 'format_id': lang,
  66. 'format_note': xpath_text(file_, './lglabel'),
  67. 'language_preference': language_preference(lang)
  68. })
  69. return {
  70. 'id': video_id,
  71. 'title': title,
  72. 'description': description,
  73. 'thumbnail': thumbnail,
  74. 'upload_date': upload_date,
  75. 'duration': duration,
  76. 'view_count': view_count,
  77. 'formats': formats
  78. }