audius.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. import random
  2. from .common import InfoExtractor
  3. from ..compat import compat_str, compat_urllib_parse_unquote
  4. from ..utils import ExtractorError, str_or_none, try_get
  5. class AudiusBaseIE(InfoExtractor):
  6. _API_BASE = None
  7. _API_V = '/v1'
  8. def _get_response_data(self, response):
  9. if isinstance(response, dict):
  10. response_data = response.get('data')
  11. if response_data is not None:
  12. return response_data
  13. if len(response) == 1 and 'message' in response:
  14. raise ExtractorError('API error: %s' % response['message'],
  15. expected=True)
  16. raise ExtractorError('Unexpected API response')
  17. def _select_api_base(self):
  18. """Selecting one of the currently available API hosts"""
  19. response = super(AudiusBaseIE, self)._download_json(
  20. 'https://api.audius.co/', None,
  21. note='Requesting available API hosts',
  22. errnote='Unable to request available API hosts')
  23. hosts = self._get_response_data(response)
  24. if isinstance(hosts, list):
  25. self._API_BASE = random.choice(hosts)
  26. return
  27. raise ExtractorError('Unable to get available API hosts')
  28. @staticmethod
  29. def _prepare_url(url, title):
  30. """
  31. Audius removes forward slashes from the uri, but leaves backslashes.
  32. The problem is that the current version of Chrome replaces backslashes
  33. in the address bar with a forward slashes, so if you copy the link from
  34. there and paste it into youtube-dl, you won't be able to download
  35. anything from this link, since the Audius API won't be able to resolve
  36. this url
  37. """
  38. url = compat_urllib_parse_unquote(url)
  39. title = compat_urllib_parse_unquote(title)
  40. if '/' in title or '%2F' in title:
  41. fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
  42. return url.replace(title, fixed_title)
  43. return url
  44. def _api_request(self, path, item_id=None, note='Downloading JSON metadata',
  45. errnote='Unable to download JSON metadata',
  46. expected_status=None):
  47. if self._API_BASE is None:
  48. self._select_api_base()
  49. try:
  50. response = super(AudiusBaseIE, self)._download_json(
  51. '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
  52. errnote=errnote, expected_status=expected_status)
  53. except ExtractorError as exc:
  54. # some of Audius API hosts may not work as expected and return HTML
  55. if 'Failed to parse JSON' in compat_str(exc):
  56. raise ExtractorError('An error occurred while receiving data. Try again',
  57. expected=True)
  58. raise exc
  59. return self._get_response_data(response)
  60. def _resolve_url(self, url, item_id):
  61. return self._api_request('/resolve?url=%s' % url, item_id,
  62. expected_status=404)
  63. class AudiusIE(AudiusBaseIE):
  64. _VALID_URL = r'''(?x)https?://(?:www\.)?(?:audius\.co/(?P<uploader>[\w\d-]+)(?!/album|/playlist)/(?P<title>\S+))'''
  65. IE_DESC = 'Audius.co'
  66. _TESTS = [
  67. {
  68. # URL from Chrome address bar which replace backslash to forward slash
  69. 'url': 'https://audius.co/test_acc/t%D0%B5%D0%B5%D0%B5est-1.%5E_%7B%7D/%22%3C%3E.%E2%84%96~%60-198631',
  70. 'md5': '92c35d3e754d5a0f17eef396b0d33582',
  71. 'info_dict': {
  72. 'id': 'xd8gY',
  73. 'title': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  74. 'ext': 'mp3',
  75. 'description': 'Description',
  76. 'duration': 30,
  77. 'track': '''Tеееest/ 1.!@#$%^&*()_+=[]{};'\\\":<>,.?/№~`''',
  78. 'artist': 'test',
  79. 'genre': 'Electronic',
  80. 'thumbnail': r're:https?://.*\.jpg',
  81. 'view_count': int,
  82. 'like_count': int,
  83. 'repost_count': int,
  84. }
  85. },
  86. {
  87. # Regular track
  88. 'url': 'https://audius.co/voltra/radar-103692',
  89. 'md5': '491898a0a8de39f20c5d6a8a80ab5132',
  90. 'info_dict': {
  91. 'id': 'KKdy2',
  92. 'title': 'RADAR',
  93. 'ext': 'mp3',
  94. 'duration': 318,
  95. 'track': 'RADAR',
  96. 'artist': 'voltra',
  97. 'genre': 'Trance',
  98. 'thumbnail': r're:https?://.*\.jpg',
  99. 'view_count': int,
  100. 'like_count': int,
  101. 'repost_count': int,
  102. }
  103. },
  104. ]
  105. _ARTWORK_MAP = {
  106. "150x150": 150,
  107. "480x480": 480,
  108. "1000x1000": 1000
  109. }
  110. def _real_extract(self, url):
  111. mobj = self._match_valid_url(url)
  112. track_id = try_get(mobj, lambda x: x.group('track_id'))
  113. if track_id is None:
  114. title = mobj.group('title')
  115. # uploader = mobj.group('uploader')
  116. url = self._prepare_url(url, title)
  117. track_data = self._resolve_url(url, title)
  118. else: # API link
  119. title = None
  120. # uploader = None
  121. track_data = self._api_request('/tracks/%s' % track_id, track_id)
  122. if not isinstance(track_data, dict):
  123. raise ExtractorError('Unexpected API response')
  124. track_id = track_data.get('id')
  125. if track_id is None:
  126. raise ExtractorError('Unable to get ID of the track')
  127. artworks_data = track_data.get('artwork')
  128. thumbnails = []
  129. if isinstance(artworks_data, dict):
  130. for quality_key, thumbnail_url in artworks_data.items():
  131. thumbnail = {
  132. "url": thumbnail_url
  133. }
  134. quality_code = self._ARTWORK_MAP.get(quality_key)
  135. if quality_code is not None:
  136. thumbnail['preference'] = quality_code
  137. thumbnails.append(thumbnail)
  138. return {
  139. 'id': track_id,
  140. 'title': track_data.get('title', title),
  141. 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
  142. 'ext': 'mp3',
  143. 'description': track_data.get('description'),
  144. 'duration': track_data.get('duration'),
  145. 'track': track_data.get('title'),
  146. 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
  147. 'genre': track_data.get('genre'),
  148. 'thumbnails': thumbnails,
  149. 'view_count': track_data.get('play_count'),
  150. 'like_count': track_data.get('favorite_count'),
  151. 'repost_count': track_data.get('repost_count'),
  152. }
  153. class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
  154. _VALID_URL = r'''(?x)(?:audius:)(?:https?://(?:www\.)?.+/v1/tracks/)?(?P<track_id>\w+)'''
  155. IE_NAME = 'audius:track'
  156. IE_DESC = 'Audius track ID or API link. Prepend with "audius:"'
  157. _TESTS = [
  158. {
  159. 'url': 'audius:9RWlo',
  160. 'only_matching': True
  161. },
  162. {
  163. 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
  164. 'only_matching': True
  165. },
  166. ]
  167. class AudiusPlaylistIE(AudiusBaseIE):
  168. _VALID_URL = r'https?://(?:www\.)?audius\.co/(?P<uploader>[\w\d-]+)/(?:album|playlist)/(?P<title>\S+)'
  169. IE_NAME = 'audius:playlist'
  170. IE_DESC = 'Audius.co playlists'
  171. _TEST = {
  172. 'url': 'https://audius.co/test_acc/playlist/test-playlist-22910',
  173. 'info_dict': {
  174. 'id': 'DNvjN',
  175. 'title': 'test playlist',
  176. 'description': 'Test description\n\nlol',
  177. },
  178. 'playlist_count': 175,
  179. }
  180. def _build_playlist(self, tracks):
  181. entries = []
  182. for track in tracks:
  183. if not isinstance(track, dict):
  184. raise ExtractorError('Unexpected API response')
  185. track_id = str_or_none(track.get('id'))
  186. if not track_id:
  187. raise ExtractorError('Unable to get track ID from playlist')
  188. entries.append(self.url_result(
  189. 'audius:%s' % track_id,
  190. ie=AudiusTrackIE.ie_key(), video_id=track_id))
  191. return entries
  192. def _real_extract(self, url):
  193. self._select_api_base()
  194. mobj = self._match_valid_url(url)
  195. title = mobj.group('title')
  196. # uploader = mobj.group('uploader')
  197. url = self._prepare_url(url, title)
  198. playlist_response = self._resolve_url(url, title)
  199. if not isinstance(playlist_response, list) or len(playlist_response) != 1:
  200. raise ExtractorError('Unexpected API response')
  201. playlist_data = playlist_response[0]
  202. if not isinstance(playlist_data, dict):
  203. raise ExtractorError('Unexpected API response')
  204. playlist_id = playlist_data.get('id')
  205. if playlist_id is None:
  206. raise ExtractorError('Unable to get playlist ID')
  207. playlist_tracks = self._api_request(
  208. '/playlists/%s/tracks' % playlist_id,
  209. title, note='Downloading playlist tracks metadata',
  210. errnote='Unable to download playlist tracks metadata')
  211. if not isinstance(playlist_tracks, list):
  212. raise ExtractorError('Unexpected API response')
  213. entries = self._build_playlist(playlist_tracks)
  214. return self.playlist_result(entries, playlist_id,
  215. playlist_data.get('playlist_name', title),
  216. playlist_data.get('description'))
  217. class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE
  218. IE_NAME = 'audius:artist'
  219. IE_DESC = 'Audius.co profile/artist pages'
  220. _VALID_URL = r'https?://(?:www)?audius\.co/(?P<id>[^\/]+)/?(?:[?#]|$)'
  221. _TEST = {
  222. 'url': 'https://audius.co/pzl/',
  223. 'info_dict': {
  224. 'id': 'ezRo7',
  225. 'description': 'TAMALE\n\nContact: officialpzl@gmail.com',
  226. 'title': 'pzl',
  227. },
  228. 'playlist_count': 24,
  229. }
  230. def _real_extract(self, url):
  231. self._select_api_base()
  232. profile_id = self._match_id(url)
  233. try:
  234. _profile_data = self._api_request('/full/users/handle/' + profile_id, profile_id)
  235. except ExtractorError as e:
  236. raise ExtractorError('Could not download profile info; ' + str(e))
  237. profile_audius_id = _profile_data[0]['id']
  238. profile_bio = _profile_data[0].get('bio')
  239. api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
  240. return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)