hotnewhiphop.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. from .common import InfoExtractor
  2. from ..compat import compat_b64decode
  3. from ..utils import (
  4. ExtractorError,
  5. HEADRequest,
  6. sanitized_Request,
  7. urlencode_postdata,
  8. )
  9. class HotNewHipHopIE(InfoExtractor):
  10. _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
  11. _TEST = {
  12. 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
  13. 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
  14. 'info_dict': {
  15. 'id': '1435540',
  16. 'ext': 'mp3',
  17. 'title': 'Freddie Gibbs - Lay It Down'
  18. }
  19. }
  20. def _real_extract(self, url):
  21. video_id = self._match_id(url)
  22. webpage = self._download_webpage(url, video_id)
  23. video_url_base64 = self._search_regex(
  24. r'data-path="(.*?)"', webpage, 'video URL', default=None)
  25. if video_url_base64 is None:
  26. video_url = self._search_regex(
  27. r'"contentUrl" content="(.*?)"', webpage, 'content URL')
  28. return self.url_result(video_url, ie='Youtube')
  29. reqdata = urlencode_postdata([
  30. ('mediaType', 's'),
  31. ('mediaId', video_id),
  32. ])
  33. r = sanitized_Request(
  34. 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
  35. r.add_header('Content-Type', 'application/x-www-form-urlencoded')
  36. mkd = self._download_json(
  37. r, video_id, note='Requesting media key',
  38. errnote='Could not download media key')
  39. if 'mediaKey' not in mkd:
  40. raise ExtractorError('Did not get a media key')
  41. redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
  42. redirect_req = HEADRequest(redirect_url)
  43. req = self._request_webpage(
  44. redirect_req, video_id,
  45. note='Resolving final URL', errnote='Could not resolve final URL')
  46. video_url = req.geturl()
  47. if video_url.endswith('.html'):
  48. raise ExtractorError('Redirect failed')
  49. video_title = self._og_search_title(webpage).strip()
  50. return {
  51. 'id': video_id,
  52. 'url': video_url,
  53. 'title': video_title,
  54. 'thumbnail': self._og_search_thumbnail(webpage),
  55. }