jwplatform.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import re
  2. from .common import InfoExtractor
  3. from ..utils import unsmuggle_url
  4. class JWPlatformIE(InfoExtractor):
  5. _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
  6. _TESTS = [{
  7. 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
  8. 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
  9. 'info_dict': {
  10. 'id': 'nPripu9l',
  11. 'ext': 'mov',
  12. 'title': 'Big Buck Bunny Trailer',
  13. 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
  14. 'upload_date': '20081127',
  15. 'timestamp': 1227796140,
  16. }
  17. }, {
  18. 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
  19. 'only_matching': True,
  20. }]
  21. _WEBPAGE_TESTS = [{
  22. # JWPlatform iframe
  23. 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
  24. 'info_dict': {
  25. 'id': 'AG26UQXM',
  26. 'ext': 'mp4',
  27. 'upload_date': '20160719',
  28. 'timestamp': 1468923808,
  29. 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
  30. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
  31. 'description': '',
  32. 'duration': 294.0,
  33. },
  34. }, {
  35. # Player url not surrounded by quotes
  36. 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
  37. 'info_dict': {
  38. 'id': 'R10NQdhY',
  39. 'title': 'Playgirl',
  40. 'ext': 'mp4',
  41. 'upload_date': '20220624',
  42. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
  43. 'timestamp': 1656064800,
  44. 'description': 'BRD 1966, Will Tremper',
  45. 'duration': 5146.0,
  46. },
  47. 'params': {'allowed_extractors': ['generic', 'jwplatform']},
  48. }]
  49. @classmethod
  50. def _extract_embed_urls(cls, url, webpage):
  51. for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
  52. # <input value=URL> is used by hyland.com
  53. # if we find <iframe>, dont look for <input>
  54. ret = re.findall(
  55. r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
  56. webpage)
  57. if ret:
  58. return ret
  59. mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
  60. if mobj:
  61. return [f'jwplatform:{mobj.group(1)}']
  62. def _real_extract(self, url):
  63. url, smuggled_data = unsmuggle_url(url, {})
  64. self._initialize_geo_bypass({
  65. 'countries': smuggled_data.get('geo_countries'),
  66. })
  67. video_id = self._match_id(url)
  68. json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
  69. return self._parse_jwplayer_data(json_data, video_id)