expressen.py 3.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. determine_ext,
  4. int_or_none,
  5. unescapeHTML,
  6. unified_timestamp,
  7. )
  8. class ExpressenIE(InfoExtractor):
  9. _VALID_URL = r'''(?x)
  10. https?://
  11. (?:www\.)?(?:expressen|di)\.se/
  12. (?:(?:tvspelare/video|videoplayer/embed)/)?
  13. tv/(?:[^/]+/)*
  14. (?P<id>[^/?#&]+)
  15. '''
  16. _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
  17. _TESTS = [{
  18. 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
  19. 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
  20. 'info_dict': {
  21. 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
  22. 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
  23. 'ext': 'mp4',
  24. 'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
  25. 'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
  26. 'thumbnail': r're:^https?://.*\.jpg$',
  27. 'duration': 788,
  28. 'timestamp': 1526639109,
  29. 'upload_date': '20180518',
  30. },
  31. }, {
  32. 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
  33. 'only_matching': True,
  34. }, {
  35. 'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  36. 'only_matching': True,
  37. }, {
  38. 'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  39. 'only_matching': True,
  40. }, {
  41. 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
  42. 'only_matching': True,
  43. }]
  44. def _real_extract(self, url):
  45. display_id = self._match_id(url)
  46. webpage = self._download_webpage(url, display_id)
  47. def extract_data(name):
  48. return self._parse_json(
  49. self._search_regex(
  50. r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
  51. webpage, 'info', group='value'),
  52. display_id, transform_source=unescapeHTML)
  53. info = extract_data('video-tracking-info')
  54. video_id = info['contentId']
  55. data = extract_data('article-data')
  56. stream = data['stream']
  57. if determine_ext(stream) == 'm3u8':
  58. formats = self._extract_m3u8_formats(
  59. stream, display_id, 'mp4', entry_protocol='m3u8_native',
  60. m3u8_id='hls')
  61. else:
  62. formats = [{
  63. 'url': stream,
  64. }]
  65. title = info.get('titleRaw') or data['title']
  66. description = info.get('descriptionRaw')
  67. thumbnail = info.get('socialMediaImage') or data.get('image')
  68. duration = int_or_none(info.get('videoTotalSecondsDuration')
  69. or data.get('totalSecondsDuration'))
  70. timestamp = unified_timestamp(info.get('publishDate'))
  71. return {
  72. 'id': video_id,
  73. 'display_id': display_id,
  74. 'title': title,
  75. 'description': description,
  76. 'thumbnail': thumbnail,
  77. 'duration': duration,
  78. 'timestamp': timestamp,
  79. 'formats': formats,
  80. }