regiotv.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. from .common import InfoExtractor
  2. from ..utils import (
  3. sanitized_Request,
  4. xpath_text,
  5. xpath_with_ns,
  6. )
  7. class RegioTVIE(InfoExtractor):
  8. _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)'
  9. _TESTS = [{
  10. 'url': 'http://www.regio-tv.de/video/395808.html',
  11. 'info_dict': {
  12. 'id': '395808',
  13. 'ext': 'mp4',
  14. 'title': 'Wir in Ludwigsburg',
  15. 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!',
  16. }
  17. }, {
  18. 'url': 'http://www.regio-tv.de/video/395808',
  19. 'only_matching': True,
  20. }]
  21. def _real_extract(self, url):
  22. video_id = self._match_id(url)
  23. webpage = self._download_webpage(url, video_id)
  24. key = self._search_regex(
  25. r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key')
  26. title = self._og_search_title(webpage)
  27. SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>'
  28. request = sanitized_Request(
  29. 'http://v.telvi.de/',
  30. SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
  31. video_data = self._download_xml(request, video_id, 'Downloading video XML')
  32. NS_MAP = {
  33. 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
  34. 'soap': 'http://schemas.xmlsoap.org/soap/envelope/',
  35. }
  36. video_url = xpath_text(
  37. video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True)
  38. thumbnail = xpath_text(
  39. video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail')
  40. description = self._og_search_description(
  41. webpage) or self._html_search_meta('description', webpage)
  42. return {
  43. 'id': video_id,
  44. 'url': video_url,
  45. 'title': title,
  46. 'description': description,
  47. 'thumbnail': thumbnail,
  48. }