amara.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. from .common import InfoExtractor
  2. from .youtube import YoutubeIE
  3. from .vimeo import VimeoIE
  4. from ..utils import (
  5. int_or_none,
  6. parse_iso8601,
  7. update_url_query,
  8. )
  9. class AmaraIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
  11. _TESTS = [{
  12. # Youtube
  13. 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
  14. 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
  15. 'info_dict': {
  16. 'id': 'h6ZuVdvYnfE',
  17. 'ext': 'mp4',
  18. 'title': 'Why jury trials are becoming less common',
  19. 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
  20. 'thumbnail': r're:^https?://.*\.jpg$',
  21. 'subtitles': dict,
  22. 'upload_date': '20160813',
  23. 'uploader': 'PBS NewsHour',
  24. 'uploader_id': 'PBSNewsHour',
  25. 'timestamp': 1549639570,
  26. }
  27. }, {
  28. # Vimeo
  29. 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
  30. 'md5': '99392c75fa05d432a8f11df03612195e',
  31. 'info_dict': {
  32. 'id': '18622084',
  33. 'ext': 'mov',
  34. 'title': 'Vimeo at CES 2011!',
  35. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  36. 'thumbnail': r're:^https?://.*\.jpg$',
  37. 'subtitles': dict,
  38. 'timestamp': 1294763658,
  39. 'upload_date': '20110111',
  40. 'uploader': 'Sam Morrill',
  41. 'uploader_id': 'sammorrill'
  42. }
  43. }, {
  44. # Direct Link
  45. 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
  46. 'md5': 'd3970f08512738ee60c5807311ff5d3f',
  47. 'info_dict': {
  48. 'id': 's8KL7I3jLmh6',
  49. 'ext': 'mp4',
  50. 'title': 'The danger of a single story',
  51. 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
  52. 'thumbnail': r're:^https?://.*\.jpg$',
  53. 'subtitles': dict,
  54. 'upload_date': '20091007',
  55. 'timestamp': 1254942511,
  56. }
  57. }]
  58. def _real_extract(self, url):
  59. video_id = self._match_id(url)
  60. meta = self._download_json(
  61. 'https://amara.org/api/videos/%s/' % video_id,
  62. video_id, query={'format': 'json'})
  63. title = meta['title']
  64. video_url = meta['all_urls'][0]
  65. subtitles = {}
  66. for language in (meta.get('languages') or []):
  67. subtitles_uri = language.get('subtitles_uri')
  68. if not (subtitles_uri and language.get('published')):
  69. continue
  70. subtitle = subtitles.setdefault(language.get('code') or 'en', [])
  71. for f in ('json', 'srt', 'vtt'):
  72. subtitle.append({
  73. 'ext': f,
  74. 'url': update_url_query(subtitles_uri, {'format': f}),
  75. })
  76. info = {
  77. 'url': video_url,
  78. 'id': video_id,
  79. 'subtitles': subtitles,
  80. 'title': title,
  81. 'description': meta.get('description'),
  82. 'thumbnail': meta.get('thumbnail'),
  83. 'duration': int_or_none(meta.get('duration')),
  84. 'timestamp': parse_iso8601(meta.get('created')),
  85. }
  86. for ie in (YoutubeIE, VimeoIE):
  87. if ie.suitable(video_url):
  88. info.update({
  89. '_type': 'url_transparent',
  90. 'ie_key': ie.ie_key(),
  91. })
  92. break
  93. return info