coub.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. ExtractorError,
  4. float_or_none,
  5. int_or_none,
  6. parse_iso8601,
  7. qualities,
  8. )
  9. class CoubIE(InfoExtractor):
  10. _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
  11. _TESTS = [{
  12. 'url': 'http://coub.com/view/5u5n1',
  13. 'info_dict': {
  14. 'id': '5u5n1',
  15. 'ext': 'mp4',
  16. 'title': 'The Matrix Moonwalk',
  17. 'thumbnail': r're:^https?://.*\.jpg$',
  18. 'duration': 4.6,
  19. 'timestamp': 1428527772,
  20. 'upload_date': '20150408',
  21. 'uploader': 'Artyom Loskutnikov',
  22. 'uploader_id': 'artyom.loskutnikov',
  23. 'view_count': int,
  24. 'like_count': int,
  25. 'repost_count': int,
  26. 'age_limit': 0,
  27. },
  28. }, {
  29. 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
  30. 'only_matching': True,
  31. }, {
  32. 'url': 'coub:5u5n1',
  33. 'only_matching': True,
  34. }, {
  35. # longer video id
  36. 'url': 'http://coub.com/view/237d5l5h',
  37. 'only_matching': True,
  38. }]
  39. def _real_extract(self, url):
  40. video_id = self._match_id(url)
  41. coub = self._download_json(
  42. 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
  43. if coub.get('error'):
  44. raise ExtractorError(
  45. '%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
  46. title = coub['title']
  47. file_versions = coub['file_versions']
  48. QUALITIES = ('low', 'med', 'high', 'higher')
  49. MOBILE = 'mobile'
  50. IPHONE = 'iphone'
  51. HTML5 = 'html5'
  52. SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
  53. quality_key = qualities(QUALITIES)
  54. preference_key = qualities(SOURCE_PREFERENCE)
  55. formats = []
  56. for kind, items in file_versions.get(HTML5, {}).items():
  57. if kind not in ('video', 'audio'):
  58. continue
  59. if not isinstance(items, dict):
  60. continue
  61. for quality, item in items.items():
  62. if not isinstance(item, dict):
  63. continue
  64. item_url = item.get('url')
  65. if not item_url:
  66. continue
  67. formats.append({
  68. 'url': item_url,
  69. 'format_id': '%s-%s-%s' % (HTML5, kind, quality),
  70. 'filesize': int_or_none(item.get('size')),
  71. 'vcodec': 'none' if kind == 'audio' else None,
  72. 'acodec': 'none' if kind == 'video' else None,
  73. 'quality': quality_key(quality),
  74. 'source_preference': preference_key(HTML5),
  75. })
  76. iphone_url = file_versions.get(IPHONE, {}).get('url')
  77. if iphone_url:
  78. formats.append({
  79. 'url': iphone_url,
  80. 'format_id': IPHONE,
  81. 'source_preference': preference_key(IPHONE),
  82. })
  83. mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
  84. if mobile_url:
  85. formats.append({
  86. 'url': mobile_url,
  87. 'format_id': '%s-audio' % MOBILE,
  88. 'source_preference': preference_key(MOBILE),
  89. })
  90. thumbnail = coub.get('picture')
  91. duration = float_or_none(coub.get('duration'))
  92. timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
  93. uploader = coub.get('channel', {}).get('title')
  94. uploader_id = coub.get('channel', {}).get('permalink')
  95. view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
  96. like_count = int_or_none(coub.get('likes_count'))
  97. repost_count = int_or_none(coub.get('recoubs_count'))
  98. age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
  99. if age_restricted is not None:
  100. age_limit = 18 if age_restricted is True else 0
  101. else:
  102. age_limit = None
  103. return {
  104. 'id': video_id,
  105. 'title': title,
  106. 'thumbnail': thumbnail,
  107. 'duration': duration,
  108. 'timestamp': timestamp,
  109. 'uploader': uploader,
  110. 'uploader_id': uploader_id,
  111. 'view_count': view_count,
  112. 'like_count': like_count,
  113. 'repost_count': repost_count,
  114. 'age_limit': age_limit,
  115. 'formats': formats,
  116. }