123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- import random
- from .common import InfoExtractor
- from ..compat import compat_str, compat_urllib_parse_unquote
- from ..utils import ExtractorError, str_or_none, try_get
- class AudiusBaseIE(InfoExtractor):
- _API_BASE = None
- _API_V = '/v1'
- def _get_response_data(self, response):
- if isinstance(response, dict):
- response_data = response.get('data')
- if response_data is not None:
- return response_data
- if len(response) == 1 and 'message' in response:
- raise ExtractorError('API error: %s' % response['message'],
- expected=True)
- raise ExtractorError('Unexpected API response')
- def _select_api_base(self):
- """Selecting one of the currently available API hosts"""
- response = super(AudiusBaseIE, self)._download_json(
- 'https://api.audius.co/', None,
- note='Requesting available API hosts',
- errnote='Unable to request available API hosts')
- hosts = self._get_response_data(response)
- if isinstance(hosts, list):
- self._API_BASE = random.choice(hosts)
- return
- raise ExtractorError('Unable to get available API hosts')
- @staticmethod
- def _prepare_url(url, title):
- """
- Audius removes forward slashes from the uri, but leaves backslashes.
- The problem is that the current version of Chrome replaces backslashes
- in the address bar with a forward slashes, so if you copy the link from
- there and paste it into youtube-dl, you won't be able to download
- anything from this link, since the Audius API won't be able to resolve
- this url
- """
- url = compat_urllib_parse_unquote(url)
- title = compat_urllib_parse_unquote(title)
- if '/' in title or '%2F' in title:
- fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
- return url.replace(title, fixed_title)
- return url
- def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata',
- expected_status=None):
- if self._API_BASE is None:
- self._select_api_base()
- try:
- response = super(AudiusBaseIE, self)._download_json(
- '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
- errnote=errnote, expected_status=expected_status)
- except ExtractorError as exc:
- # some of Audius API hosts may not work as expected and return HTML
- if 'Failed to parse JSON' in compat_str(exc):
- raise ExtractorError('An error occurred while receiving data. Try again',
- expected=True)
- raise exc
- return self._get_response_data(response)
- def _resolve_url(self, url, item_id):
- return self._api_request('/resolve?url=%s' % url, item_id,
- expected_status=404)
- class AudiusIE(AudiusBaseIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
- IE_DESC = 'Audius.co'
- _TESTS = [
- {
- # URL from Chrome address bar which replace backslash to forward slash
- 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
- 'md5': '92c35d3e754d5a0f17eef396b0d33582',
- 'info_dict': {
- 'id': 'xd8gY',
- 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
- 'ext': 'mp3',
- 'description': 'Description',
- 'duration': 30,
- 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
- 'artist': 'test',
- 'genre': 'Electronic',
- 'thumbnail': r're:https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'repost_count': int,
- }
- },
- {
- # Regular track
- 'url': 'https://audius.co/voltra/radar-103692',
- 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
- 'info_dict': {
- 'id': 'KKdy2',
- 'title': 'RADAR',
- 'ext': 'mp3',
- 'duration': 318,
- 'track': 'RADAR',
- 'artist': 'voltra',
- 'genre': 'Trance',
- 'thumbnail': r're:https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'repost_count': int,
- }
- },
- ]
- _ARTWORK_MAP = {
- "150x150": 150,
- "480x480": 480,
- "1000x1000": 1000
- }
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- track_id = try_get(mobj, lambda x: x.group('track_id'))
- if track_id is None:
- title = mobj.group('title')
- # uploader = mobj.group('uploader')
- url = self._prepare_url(url, title)
- track_data = self._resolve_url(url, title)
- else: # API link
- title = None
- # uploader = None
- track_data = self._api_request('/tracks/%s' % track_id, track_id)
- if not isinstance(track_data, dict):
- raise ExtractorError('Unexpected API response')
- track_id = track_data.get('id')
- if track_id is None:
- raise ExtractorError('Unable to get ID of the track')
- artworks_data = track_data.get('artwork')
- thumbnails = []
- if isinstance(artworks_data, dict):
- for quality_key, thumbnail_url in artworks_data.items():
- thumbnail = {
- "url": thumbnail_url
- }
- quality_code = self._ARTWORK_MAP.get(quality_key)
- if quality_code is not None:
- thumbnail['preference'] = quality_code
- thumbnails.append(thumbnail)
- return {
- 'id': track_id,
- 'title': track_data.get('title', title),
- 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
- 'ext': 'mp3',
- 'description': track_data.get('description'),
- 'duration': track_data.get('duration'),
- 'track': track_data.get('title'),
- 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
- 'genre': track_data.get('genre'),
- 'thumbnails': thumbnails,
- 'view_count': track_data.get('play_count'),
- 'like_count': track_data.get('favorite_count'),
- 'repost_count': track_data.get('repost_count'),
- }
- class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
- IE_NAME = 'audius:track'
- IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
- _TESTS = [
- {
- 'url': 'audius:9RWlo',
- 'only_matching': True
- },
- {
- 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
- 'only_matching': True
- },
- ]
- class AudiusPlaylistIE(AudiusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
- IE_NAME = 'audius:playlist'
- IE_DESC = 'Audius.co playlists'
- _TEST = {
- 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
- 'info_dict': {
- 'id': 'DNvjN',
- 'title': 'test playlist',
- 'description': 'Test description\n\nlol',
- },
- 'playlist_count': 175,
- }
- def _build_playlist(self, tracks):
- entries = []
- for track in tracks:
- if not isinstance(track, dict):
- raise ExtractorError('Unexpected API response')
- track_id = str_or_none(track.get('id'))
- if not track_id:
- raise ExtractorError('Unable to get track ID from playlist')
- entries.append(self.url_result(
- 'audius:%s' % track_id,
- ie=AudiusTrackIE.ie_key(), video_id=track_id))
- return entries
- def _real_extract(self, url):
- self._select_api_base()
- mobj = self._match_valid_url(url)
- title = mobj.group('title')
- # uploader = mobj.group('uploader')
- url = self._prepare_url(url, title)
- playlist_response = self._resolve_url(url, title)
- if not isinstance(playlist_response, list) or len(playlist_response) != 1:
- raise ExtractorError('Unexpected API response')
- playlist_data = playlist_response[0]
- if not isinstance(playlist_data, dict):
- raise ExtractorError('Unexpected API response')
- playlist_id = playlist_data.get('id')
- if playlist_id is None:
- raise ExtractorError('Unable to get playlist ID')
- playlist_tracks = self._api_request(
- '/playlists/%s/tracks' % playlist_id,
- title, note='Downloading playlist tracks metadata',
- errnote='Unable to download playlist tracks metadata')
- if not isinstance(playlist_tracks, list):
- raise ExtractorError('Unexpected API response')
- entries = self._build_playlist(playlist_tracks)
- return self.playlist_result(entries, playlist_id,
- playlist_data.get('playlist_name', title),
- playlist_data.get('description'))
- class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
- IE_NAME = 'audius:artist'
- IE_DESC = 'Audius.co profile/artist pages'
- _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
- _TEST = {
- 'url': 'https://audius.co/pzl/',
- 'info_dict': {
- 'id': 'ezRo7',
- 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
- 'title': 'pzl',
- },
- 'playlist_count': 24,
- }
- def _real_extract(self, url):
- self._select_api_base()
- profile_id = self._match_id(url)
- try:
- _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
- except ExtractorError as e:
- raise ExtractorError('Could not download profile info; ' + str(e))
- profile_audius_id = _profile_data[0]['id']
- profile_bio = _profile_data[0].get('bio')
- api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
- return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
|