vidbit.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. from .common import InfoExtractor
  2. from ..compat import compat_urlparse
  3. from ..utils import (
  4. int_or_none,
  5. js_to_json,
  6. remove_end,
  7. unified_strdate,
  8. )
  9. class VidbitIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)'
  11. _TESTS = [{
  12. 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2',
  13. 'md5': '1a34b7f14defe3b8fafca9796892924d',
  14. 'info_dict': {
  15. 'id': 'jkL2yDOEq2',
  16. 'ext': 'mp4',
  17. 'title': 'Intro to VidBit',
  18. 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7',
  19. 'thumbnail': r're:https?://.*\.jpg$',
  20. 'upload_date': '20160618',
  21. 'view_count': int,
  22. 'comment_count': int,
  23. }
  24. }, {
  25. 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0',
  26. 'only_matching': True,
  27. }]
  28. def _real_extract(self, url):
  29. video_id = self._match_id(url)
  30. webpage = self._download_webpage(
  31. compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id)
  32. video_url, title = [None] * 2
  33. config = self._parse_json(self._search_regex(
  34. r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'),
  35. video_id, transform_source=js_to_json)
  36. if config:
  37. if config.get('file'):
  38. video_url = compat_urlparse.urljoin(url, config['file'])
  39. title = config.get('title')
  40. if not video_url:
  41. video_url = compat_urlparse.urljoin(url, self._search_regex(
  42. r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
  43. webpage, 'video URL', group='url'))
  44. if not title:
  45. title = remove_end(
  46. self._html_search_regex(
  47. (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'),
  48. webpage, 'title', default=None) or self._og_search_title(webpage),
  49. ' - VidBit')
  50. description = self._html_search_meta(
  51. ('description', 'og:description', 'twitter:description'),
  52. webpage, 'description')
  53. upload_date = unified_strdate(self._html_search_meta(
  54. 'datePublished', webpage, 'upload date'))
  55. view_count = int_or_none(self._search_regex(
  56. r'<strong>(\d+)</strong> views',
  57. webpage, 'view count', fatal=False))
  58. comment_count = int_or_none(self._search_regex(
  59. r'id=["\']cmt_num["\'][^>]*>\((\d+)\)',
  60. webpage, 'comment count', fatal=False))
  61. return {
  62. 'id': video_id,
  63. 'url': video_url,
  64. 'title': title,
  65. 'description': description,
  66. 'thumbnail': self._og_search_thumbnail(webpage),
  67. 'upload_date': upload_date,
  68. 'view_count': view_count,
  69. 'comment_count': comment_count,
  70. }