123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817 |
- import functools
- import itertools
- import math
- import operator
- import re
- import urllib.request
- from .common import InfoExtractor
- from .openload import PhantomJSwrapper
- from ..compat import compat_HTTPError, compat_str
- from ..utils import (
- NO_DEFAULT,
- ExtractorError,
- clean_html,
- determine_ext,
- format_field,
- int_or_none,
- merge_dicts,
- orderedSet,
- remove_quotes,
- remove_start,
- str_to_int,
- update_url_query,
- url_or_none,
- urlencode_postdata,
- )
- class PornHubBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'pornhub'
- _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
- def _download_webpage_handle(self, *args, **kwargs):
- def dl(*args, **kwargs):
- return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
- ret = dl(*args, **kwargs)
- if not ret:
- return ret
- webpage, urlh = ret
- if any(re.search(p, webpage) for p in (
- r'<body\b[^>]+\bonload=["\']go\(\)',
- r'document\.cookie\s*=\s*["\']RNKEY=',
- r'document\.location\.reload\(true\)')):
- url_or_request = args[0]
- url = (url_or_request.get_full_url()
- if isinstance(url_or_request, urllib.request.Request)
- else url_or_request)
- phantom = PhantomJSwrapper(self, required_version='2.0')
- phantom.get(url, html=webpage)
- webpage, urlh = dl(*args, **kwargs)
- return webpage, urlh
- def _real_initialize(self):
- self._logged_in = False
- def _login(self, host):
- if self._logged_in:
- return
- site = host.split('.')[0]
- # Both sites pornhub and pornhubpremium have separate accounts
- # so there should be an option to provide credentials for both.
- # At the same time some videos are available under the same video id
- # on both sites so that we have to identify them as the same video.
- # For that purpose we have to keep both in the same extractor
- # but under different netrc machines.
- username, password = self._get_login_info(netrc_machine=site)
- if username is None:
- return
- login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
- login_page = self._download_webpage(
- login_url, None, 'Downloading %s login page' % site)
- def is_logged(webpage):
- return any(re.search(p, webpage) for p in (
- r'class=["\']signOut',
- r'>Sign\s+[Oo]ut\s*<'))
- if is_logged(login_page):
- self._logged_in = True
- return
- login_form = self._hidden_inputs(login_page)
- login_form.update({
- 'username': username,
- 'password': password,
- })
- response = self._download_json(
- 'https://www.%s/front/authenticate' % host, None,
- 'Logging in to %s' % site,
- data=urlencode_postdata(login_form),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Referer': login_url,
- 'X-Requested-With': 'XMLHttpRequest',
- })
- if response.get('success') == '1':
- self._logged_in = True
- return
- message = response.get('message')
- if message is not None:
- raise ExtractorError(
- 'Unable to login: %s' % message, expected=True)
- raise ExtractorError('Unable to log in')
- class PornHubIE(PornHubBaseIE):
- IE_DESC = 'PornHub and Thumbzilla'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:[^/]+\.)?
- %s
- /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
- (?:www\.)?thumbzilla\.com/video/
- )
- (?P<id>[\da-z]+)
- ''' % PornHubBaseIE._PORNHUB_HOST_RE
- _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
- _TESTS = [{
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
- 'md5': 'a6391306d050e4547f62b3f485dd9ba9',
- 'info_dict': {
- 'id': '648719015',
- 'ext': 'mp4',
- 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
- 'uploader': 'Babes',
- 'upload_date': '20130628',
- 'timestamp': 1372447216,
- 'duration': 361,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- 'cast': list,
- },
- }, {
- # non-ASCII title
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
- 'info_dict': {
- 'id': '1331683002',
- 'ext': 'mp4',
- 'title': '重庆婷婷女王足交',
- 'upload_date': '20150213',
- 'timestamp': 1423804862,
- 'duration': 1753,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
- }, {
- # subtitles
- 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
- 'info_dict': {
- 'id': 'ph5af5fef7c2aa7',
- 'ext': 'mp4',
- 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
- 'uploader': 'BFFs',
- 'duration': 622,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- 'subtitles': {
- 'en': [{
- "ext": 'srt'
- }]
- },
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video has been disabled',
- }, {
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
- 'info_dict': {
- 'id': 'ph601dc30bae19a',
- 'uploader': 'Projekt Melody',
- 'uploader_id': 'projekt-melody',
- 'upload_date': '20210205',
- 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
- 'thumbnail': r're:https?://.+',
- },
- }, {
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
- 'only_matching': True,
- }, {
- # removed at the request of cam4.com
- 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
- 'only_matching': True,
- }, {
- # removed at the request of the copyright owner
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
- 'only_matching': True,
- }, {
- # removed by uploader
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
- 'only_matching': True,
- }, {
- # private video
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
- 'only_matching': True,
- }, {
- 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
- 'only_matching': True,
- }, {
- # Some videos are available with the same id on both premium
- # and non-premium sites (e.g. this and the following test)
- 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
- 'only_matching': True,
- }, {
- # geo restricted
- 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
- 'only_matching': True,
- }, {
- 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
- 'only_matching': True,
- }]
- def _extract_count(self, pattern, webpage, name):
- return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host') or 'pornhub.com'
- video_id = mobj.group('id')
- self._login(host)
- self._set_cookie(host, 'age_verified', '1')
- def dl_webpage(platform):
- self._set_cookie(host, 'platform', platform)
- return self._download_webpage(
- 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
- video_id, 'Downloading %s webpage' % platform)
- webpage = dl_webpage('pc')
- error_msg = self._html_search_regex(
- (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
- r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
- webpage, 'error message', default=None, group='error')
- if error_msg:
- error_msg = re.sub(r'\s+', ' ', error_msg)
- raise ExtractorError(
- 'PornHub said: %s' % error_msg,
- expected=True, video_id=video_id)
- if any(re.search(p, webpage) for p in (
- r'class=["\']geoBlocked["\']',
- r'>\s*This content is unavailable in your country')):
- self.raise_geo_restricted()
- # video_title from flashvars contains whitespace instead of non-ASCII (see
- # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
- # on that anymore.
- title = self._html_search_meta(
- 'twitter:title', webpage, default=None) or self._html_search_regex(
- (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
- r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
- webpage, 'title', group='title')
- video_urls = []
- video_urls_set = set()
- subtitles = {}
- flashvars = self._parse_json(
- self._search_regex(
- r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
- video_id)
- if flashvars:
- subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
- if subtitle_url:
- subtitles.setdefault('en', []).append({
- 'url': subtitle_url,
- 'ext': 'srt',
- })
- thumbnail = flashvars.get('image_url')
- duration = int_or_none(flashvars.get('video_duration'))
- media_definitions = flashvars.get('mediaDefinitions')
- if isinstance(media_definitions, list):
- for definition in media_definitions:
- if not isinstance(definition, dict):
- continue
- video_url = definition.get('videoUrl')
- if not video_url or not isinstance(video_url, compat_str):
- continue
- if video_url in video_urls_set:
- continue
- video_urls_set.add(video_url)
- video_urls.append(
- (video_url, int_or_none(definition.get('quality'))))
- else:
- thumbnail, duration = [None] * 2
- def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
- assignments = self._search_regex(
- pattern, webpage, 'encoded url', default=default)
- if not assignments:
- return {}
- assignments = assignments.split(';')
- js_vars = {}
- def parse_js_value(inp):
- inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
- if '+' in inp:
- inps = inp.split('+')
- return functools.reduce(
- operator.concat, map(parse_js_value, inps))
- inp = inp.strip()
- if inp in js_vars:
- return js_vars[inp]
- return remove_quotes(inp)
- for assn in assignments:
- assn = assn.strip()
- if not assn:
- continue
- assn = re.sub(r'var\s+', '', assn)
- vname, value = assn.split('=', 1)
- js_vars[vname] = parse_js_value(value)
- return js_vars
- def add_video_url(video_url):
- v_url = url_or_none(video_url)
- if not v_url:
- return
- if v_url in video_urls_set:
- return
- video_urls.append((v_url, None))
- video_urls_set.add(v_url)
- def parse_quality_items(quality_items):
- q_items = self._parse_json(quality_items, video_id, fatal=False)
- if not isinstance(q_items, list):
- return
- for item in q_items:
- if isinstance(item, dict):
- add_video_url(item.get('url'))
- if not video_urls:
- FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
- js_vars = extract_js_vars(
- webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
- default=None)
- if js_vars:
- for key, format_url in js_vars.items():
- if key.startswith(FORMAT_PREFIXES[-1]):
- parse_quality_items(format_url)
- elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
- add_video_url(format_url)
- if not video_urls and re.search(
- r'<[^>]+\bid=["\']lockedPlayer', webpage):
- raise ExtractorError(
- 'Video %s is locked' % video_id, expected=True)
- if not video_urls:
- js_vars = extract_js_vars(
- dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
- add_video_url(js_vars['mediastring'])
- for mobj in re.finditer(
- r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage):
- video_url = mobj.group('url')
- if video_url not in video_urls_set:
- video_urls.append((video_url, None))
- video_urls_set.add(video_url)
- upload_date = None
- formats = []
- def add_format(format_url, height=None):
- ext = determine_ext(format_url)
- if ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False))
- return
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- return
- if not height:
- height = int_or_none(self._search_regex(
- r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
- default=None))
- formats.append({
- 'url': format_url,
- 'format_id': format_field(height, None, '%dp'),
- 'height': height,
- })
- for video_url, height in video_urls:
- if not upload_date:
- upload_date = self._search_regex(
- r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
- if upload_date:
- upload_date = upload_date.replace('/', '')
- if '/video/get_media' in video_url:
- medias = self._download_json(video_url, video_id, fatal=False)
- if isinstance(medias, list):
- for media in medias:
- if not isinstance(media, dict):
- continue
- video_url = url_or_none(media.get('videoUrl'))
- if not video_url:
- continue
- height = int_or_none(media.get('quality'))
- add_format(video_url, height)
- continue
- add_format(video_url)
- model_profile = self._search_json(
- r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
- video_uploader = self._html_search_regex(
- r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', default=None) or model_profile.get('username')
- def extract_vote_count(kind, name):
- return self._extract_count(
- (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
- r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
- webpage, name)
- view_count = self._extract_count(
- r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
- like_count = extract_vote_count('Up', 'like')
- dislike_count = extract_vote_count('Down', 'dislike')
- comment_count = self._extract_count(
- r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- def extract_list(meta_key):
- div = self._search_regex(
- r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
- % meta_key, webpage, meta_key, default=None)
- if div:
- return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
- info = self._search_json_ld(webpage, video_id, default={})
- # description provided in JSON-LD is irrelevant
- info['description'] = None
- return merge_dicts({
- 'id': video_id,
- 'uploader': video_uploader,
- 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
- 'upload_date': upload_date,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'comment_count': comment_count,
- 'formats': formats,
- 'age_limit': 18,
- 'tags': extract_list('tags'),
- 'categories': extract_list('categories'),
- 'cast': extract_list('pornstars'),
- 'subtitles': subtitles,
- }, info)
- class PornHubPlaylistBaseIE(PornHubBaseIE):
- def _extract_page(self, url):
- return int_or_none(self._search_regex(
- r'\bpage=(\d+)', url, 'page', default=None))
- def _extract_entries(self, webpage, host):
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/ytdl-org/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(<div[^>]+class=["\']container.+)', webpage,
- 'container', default=webpage)
- return [
- self.url_result(
- 'http://www.%s/%s' % (host, video_url),
- PornHubIE.ie_key(), video_title=title)
- for video_url, title in orderedSet(re.findall(
- r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- container))
- ]
- class PornHubUserIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
- _TESTS = [{
- 'url': 'https://www.pornhub.com/model/zoe_ph',
- 'playlist_mincount': 118,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
- 'info_dict': {
- 'id': 'liz-vicious',
- },
- 'playlist_mincount': 118,
- }, {
- 'url': 'https://www.pornhub.com/users/russianveet69',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/channels/povd',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
- 'only_matching': True,
- }, {
- # Unavailable via /videos page, but available with direct pagination
- # on pornstar page (see [1]), requires premium
- # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
- 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
- 'only_matching': True,
- }, {
- # Same as before, multi page
- 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
- 'only_matching': True,
- }, {
- 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
- 'only_matching': True,
- }]
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- user_id = mobj.group('id')
- videos_url = '%s/videos' % mobj.group('url')
- page = self._extract_page(url)
- if page:
- videos_url = update_url_query(videos_url, {'page': page})
- return self.url_result(
- videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
- class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
- @staticmethod
- def _has_more(webpage):
- return re.search(
- r'''(?x)
- <li[^>]+\bclass=["\']page_next|
- <link[^>]+\brel=["\']next|
- <button[^>]+\bid=["\']moreDataBtn
- ''', webpage) is not None
- def _entries(self, url, host, item_id):
- page = self._extract_page(url)
- VIDEOS = '/videos'
- def download_page(base_url, num, fallback=False):
- note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
- return self._download_webpage(
- base_url, item_id, note, query={'page': num})
- def is_404(e):
- return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
- base_url = url
- has_page = page is not None
- first_page = page if has_page else 1
- for page_num in (first_page, ) if has_page else itertools.count(first_page):
- try:
- try:
- webpage = download_page(base_url, page_num)
- except ExtractorError as e:
- # Some sources may not be available via /videos page,
- # trying to fallback to main page pagination (see [1])
- # 1. https://github.com/ytdl-org/youtube-dl/issues/27853
- if is_404(e) and page_num == first_page and VIDEOS in base_url:
- base_url = base_url.replace(VIDEOS, '')
- webpage = download_page(base_url, page_num, fallback=True)
- else:
- raise
- except ExtractorError as e:
- if is_404(e) and page_num != first_page:
- break
- raise
- page_entries = self._extract_entries(webpage, host)
- if not page_entries:
- break
- for e in page_entries:
- yield e
- if not self._has_more(webpage):
- break
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- item_id = mobj.group('id')
- self._login(host)
- return self.playlist_result(self._entries(url, host, item_id), item_id)
- class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
- _TESTS = [{
- 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/users/rushandlia/videos',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
- 'info_dict': {
- 'id': 'pornstar/jenny-blighe/videos',
- },
- 'playlist_mincount': 149,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
- 'info_dict': {
- 'id': 'pornstar/jenny-blighe/videos',
- },
- 'playlist_mincount': 40,
- }, {
- # default sorting as Top Rated Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos',
- 'info_dict': {
- 'id': 'channels/povd/videos',
- },
- 'playlist_mincount': 293,
- }, {
- # Top Rated Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
- 'only_matching': True,
- }, {
- # Most Recent Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
- 'only_matching': True,
- }, {
- # Most Viewed Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
- 'only_matching': True,
- }, {
- # Most Viewed Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
- 'only_matching': True,
- }, {
- # Top Rated Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
- 'only_matching': True,
- }, {
- # Longest Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
- 'only_matching': True,
- }, {
- # Newest Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video/search?search=123',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/categories/teen',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/categories/teen?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/hd',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/hd?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/described-video',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/described-video?page=2',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
- 'only_matching': True,
- }, {
- 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
- 'only_matching': True,
- }]
- @classmethod
- def suitable(cls, url):
- return (False
- if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
- else super(PornHubPagedVideoListIE, cls).suitable(url))
- class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
- _TESTS = [{
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
- 'info_dict': {
- 'id': 'jenny-blighe',
- },
- 'playlist_mincount': 129,
- }, {
- 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
- 'only_matching': True,
- }, {
- 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
- 'only_matching': True,
- }]
- class PornHubPlaylistIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE
- _TESTS = [{
- 'url': 'https://www.pornhub.com/playlist/44121572',
- 'info_dict': {
- 'id': '44121572',
- },
- 'playlist_count': 77,
- }, {
- 'url': 'https://www.pornhub.com/playlist/4667351',
- 'only_matching': True,
- }, {
- 'url': 'https://de.pornhub.com/playlist/4667351',
- 'only_matching': True,
- }, {
- 'url': 'https://de.pornhub.com/playlist/4667351?page=2',
- 'only_matching': True,
- }]
- def _entries(self, url, host, item_id):
- webpage = self._download_webpage(url, item_id, 'Downloading page 1')
- playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
- video_count = int_or_none(
- self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
- token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
- page_count = math.ceil((video_count - 36) / 40.) + 1
- page_entries = self._extract_entries(webpage, host)
- def download_page(page_num):
- note = 'Downloading page {}'.format(page_num)
- page_url = 'https://www.{}/playlist/viewChunked'.format(host)
- return self._download_webpage(page_url, item_id, note, query={
- 'id': playlist_id,
- 'page': page_num,
- 'token': token,
- })
- for page_num in range(1, page_count + 1):
- if page_num > 1:
- webpage = download_page(page_num)
- page_entries = self._extract_entries(webpage, host)
- if not page_entries:
- break
- for e in page_entries:
- yield e
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- host = mobj.group('host')
- item_id = mobj.group('id')
- self._login(host)
- return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
|