neteasemusic.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. import itertools
  2. import json
  3. import re
  4. import time
  5. from base64 import b64encode
  6. from binascii import hexlify
  7. from datetime import datetime
  8. from hashlib import md5
  9. from random import randint
  10. from .common import InfoExtractor
  11. from ..aes import aes_ecb_encrypt, pkcs7_padding
  12. from ..compat import compat_urllib_parse_urlencode
  13. from ..utils import (
  14. ExtractorError,
  15. bytes_to_intlist,
  16. error_to_compat_str,
  17. float_or_none,
  18. int_or_none,
  19. intlist_to_bytes,
  20. sanitized_Request,
  21. try_get,
  22. )
  23. class NetEaseMusicBaseIE(InfoExtractor):
  24. _FORMATS = ['bMusic', 'mMusic', 'hMusic']
  25. _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
  26. _API_BASE = 'http://music.163.com/api/'
  27. @classmethod
  28. def _encrypt(cls, dfsid):
  29. salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
  30. string_bytes = bytearray(str(dfsid).encode('ascii'))
  31. salt_len = len(salt_bytes)
  32. for i in range(len(string_bytes)):
  33. string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
  34. m = md5()
  35. m.update(bytes(string_bytes))
  36. result = b64encode(m.digest()).decode('ascii')
  37. return result.replace('/', '_').replace('+', '-')
  38. def make_player_api_request_data_and_headers(self, song_id, bitrate):
  39. KEY = b'e82ckenh8dichen8'
  40. URL = '/api/song/enhance/player/url'
  41. now = int(time.time() * 1000)
  42. rand = randint(0, 1000)
  43. cookie = {
  44. 'osver': None,
  45. 'deviceId': None,
  46. 'appver': '8.0.0',
  47. 'versioncode': '140',
  48. 'mobilename': None,
  49. 'buildver': '1623435496',
  50. 'resolution': '1920x1080',
  51. '__csrf': '',
  52. 'os': 'pc',
  53. 'channel': None,
  54. 'requestId': '{0}_{1:04}'.format(now, rand),
  55. }
  56. request_text = json.dumps(
  57. {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
  58. separators=(',', ':'))
  59. message = 'nobody{0}use{1}md5forencrypt'.format(
  60. URL, request_text).encode('latin1')
  61. msg_digest = md5(message).hexdigest()
  62. data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
  63. URL, request_text, msg_digest)
  64. data = pkcs7_padding(bytes_to_intlist(data))
  65. encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
  66. encrypted_params = hexlify(encrypted).decode('ascii').upper()
  67. cookie = '; '.join(
  68. ['{0}={1}'.format(k, v if v is not None else 'undefined')
  69. for [k, v] in cookie.items()])
  70. headers = {
  71. 'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
  72. 'Content-Type': 'application/x-www-form-urlencoded',
  73. 'Referer': 'https://music.163.com',
  74. 'Cookie': cookie,
  75. }
  76. return ('params={0}'.format(encrypted_params), headers)
  77. def _call_player_api(self, song_id, bitrate):
  78. url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
  79. data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
  80. try:
  81. msg = 'empty result'
  82. result = self._download_json(
  83. url, song_id, data=data.encode('ascii'), headers=headers)
  84. if result:
  85. return result
  86. except ExtractorError as e:
  87. if type(e.cause) in (ValueError, TypeError):
  88. # JSON load failure
  89. raise
  90. except Exception as e:
  91. msg = error_to_compat_str(e)
  92. self.report_warning('%s API call (%s) failed: %s' % (
  93. song_id, bitrate, msg))
  94. return {}
  95. def extract_formats(self, info):
  96. err = 0
  97. formats = []
  98. song_id = info['id']
  99. for song_format in self._FORMATS:
  100. details = info.get(song_format)
  101. if not details:
  102. continue
  103. bitrate = int_or_none(details.get('bitrate')) or 999000
  104. data = self._call_player_api(song_id, bitrate)
  105. for song in try_get(data, lambda x: x['data'], list) or []:
  106. song_url = try_get(song, lambda x: x['url'])
  107. if not song_url:
  108. continue
  109. if self._is_valid_url(song_url, info['id'], 'song'):
  110. formats.append({
  111. 'url': song_url,
  112. 'ext': details.get('extension'),
  113. 'abr': float_or_none(song.get('br'), scale=1000),
  114. 'format_id': song_format,
  115. 'filesize': int_or_none(song.get('size')),
  116. 'asr': int_or_none(details.get('sr')),
  117. })
  118. elif err == 0:
  119. err = try_get(song, lambda x: x['code'], int)
  120. if not formats:
  121. msg = 'No media links found'
  122. if err != 0 and (err < 200 or err >= 400):
  123. raise ExtractorError(
  124. '%s (site code %d)' % (msg, err, ), expected=True)
  125. else:
  126. self.raise_geo_restricted(
  127. msg + ': probably this video is not available from your location due to geo restriction.',
  128. countries=['CN'])
  129. return formats
  130. @classmethod
  131. def convert_milliseconds(cls, ms):
  132. return int(round(ms / 1000.0))
  133. def query_api(self, endpoint, video_id, note):
  134. req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
  135. req.add_header('Referer', self._API_BASE)
  136. return self._download_json(req, video_id, note)
  137. class NetEaseMusicIE(NetEaseMusicBaseIE):
  138. IE_NAME = 'netease:song'
  139. IE_DESC = '网易云音乐'
  140. _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
  141. _TESTS = [{
  142. 'url': 'http://music.163.com/#/song?id=32102397',
  143. 'md5': '3e909614ce09b1ccef4a3eb205441190',
  144. 'info_dict': {
  145. 'id': '32102397',
  146. 'ext': 'mp3',
  147. 'title': 'Bad Blood',
  148. 'creator': 'Taylor Swift / Kendrick Lamar',
  149. 'upload_date': '20150516',
  150. 'timestamp': 1431792000,
  151. 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
  152. },
  153. }, {
  154. 'note': 'No lyrics.',
  155. 'url': 'http://music.163.com/song?id=17241424',
  156. 'info_dict': {
  157. 'id': '17241424',
  158. 'ext': 'mp3',
  159. 'title': 'Opus 28',
  160. 'creator': 'Dustin O\'Halloran',
  161. 'upload_date': '20080211',
  162. 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
  163. 'timestamp': 1202745600,
  164. },
  165. }, {
  166. 'note': 'Has translated name.',
  167. 'url': 'http://music.163.com/#/song?id=22735043',
  168. 'info_dict': {
  169. 'id': '22735043',
  170. 'ext': 'mp3',
  171. 'title': '소원을 말해봐 (Genie)',
  172. 'creator': '少女时代',
  173. 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
  174. 'upload_date': '20100127',
  175. 'timestamp': 1264608000,
  176. 'alt_title': '说出愿望吧(Genie)',
  177. },
  178. }, {
  179. 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
  180. 'md5': '95826c73ea50b1c288b22180ec9e754d',
  181. 'info_dict': {
  182. 'id': '95670',
  183. 'ext': 'mp3',
  184. 'title': '国际歌',
  185. 'creator': '马备',
  186. 'upload_date': '19911130',
  187. 'timestamp': 691516800,
  188. 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
  189. },
  190. }]
  191. def _process_lyrics(self, lyrics_info):
  192. original = lyrics_info.get('lrc', {}).get('lyric')
  193. translated = lyrics_info.get('tlyric', {}).get('lyric')
  194. if not translated:
  195. return original
  196. lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
  197. original_ts_texts = re.findall(lyrics_expr, original)
  198. translation_ts_dict = dict(
  199. (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
  200. )
  201. lyrics = '\n'.join([
  202. '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
  203. for time_stamp, text in original_ts_texts
  204. ])
  205. return lyrics
  206. def _real_extract(self, url):
  207. song_id = self._match_id(url)
  208. params = {
  209. 'id': song_id,
  210. 'ids': '[%s]' % song_id
  211. }
  212. info = self.query_api(
  213. 'song/detail?' + compat_urllib_parse_urlencode(params),
  214. song_id, 'Downloading song info')['songs'][0]
  215. formats = self.extract_formats(info)
  216. lyrics_info = self.query_api(
  217. 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
  218. song_id, 'Downloading lyrics data')
  219. lyrics = self._process_lyrics(lyrics_info)
  220. alt_title = None
  221. if info.get('transNames'):
  222. alt_title = '/'.join(info.get('transNames'))
  223. return {
  224. 'id': song_id,
  225. 'title': info['name'],
  226. 'alt_title': alt_title,
  227. 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
  228. 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
  229. 'thumbnail': info.get('album', {}).get('picUrl'),
  230. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  231. 'description': lyrics,
  232. 'formats': formats,
  233. }
  234. class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
  235. IE_NAME = 'netease:album'
  236. IE_DESC = '网易云音乐 - 专辑'
  237. _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
  238. _TEST = {
  239. 'url': 'http://music.163.com/#/album?id=220780',
  240. 'info_dict': {
  241. 'id': '220780',
  242. 'title': 'B\'day',
  243. },
  244. 'playlist_count': 23,
  245. 'skip': 'Blocked outside Mainland China',
  246. }
  247. def _real_extract(self, url):
  248. album_id = self._match_id(url)
  249. info = self.query_api(
  250. 'album/%s?id=%s' % (album_id, album_id),
  251. album_id, 'Downloading album data')['album']
  252. name = info['name']
  253. desc = info.get('description')
  254. entries = [
  255. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  256. 'NetEaseMusic', song['id'])
  257. for song in info['songs']
  258. ]
  259. return self.playlist_result(entries, album_id, name, desc)
  260. class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
  261. IE_NAME = 'netease:singer'
  262. IE_DESC = '网易云音乐 - 歌手'
  263. _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
  264. _TESTS = [{
  265. 'note': 'Singer has aliases.',
  266. 'url': 'http://music.163.com/#/artist?id=10559',
  267. 'info_dict': {
  268. 'id': '10559',
  269. 'title': '张惠妹 - aMEI;阿密特',
  270. },
  271. 'playlist_count': 50,
  272. 'skip': 'Blocked outside Mainland China',
  273. }, {
  274. 'note': 'Singer has translated name.',
  275. 'url': 'http://music.163.com/#/artist?id=124098',
  276. 'info_dict': {
  277. 'id': '124098',
  278. 'title': '李昇基 - 이승기',
  279. },
  280. 'playlist_count': 50,
  281. 'skip': 'Blocked outside Mainland China',
  282. }]
  283. def _real_extract(self, url):
  284. singer_id = self._match_id(url)
  285. info = self.query_api(
  286. 'artist/%s?id=%s' % (singer_id, singer_id),
  287. singer_id, 'Downloading singer data')
  288. name = info['artist']['name']
  289. if info['artist']['trans']:
  290. name = '%s - %s' % (name, info['artist']['trans'])
  291. if info['artist']['alias']:
  292. name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
  293. entries = [
  294. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  295. 'NetEaseMusic', song['id'])
  296. for song in info['hotSongs']
  297. ]
  298. return self.playlist_result(entries, singer_id, name)
  299. class NetEaseMusicListIE(NetEaseMusicBaseIE):
  300. IE_NAME = 'netease:playlist'
  301. IE_DESC = '网易云音乐 - 歌单'
  302. _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
  303. _TESTS = [{
  304. 'url': 'http://music.163.com/#/playlist?id=79177352',
  305. 'info_dict': {
  306. 'id': '79177352',
  307. 'title': 'Billboard 2007 Top 100',
  308. 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
  309. },
  310. 'playlist_count': 99,
  311. 'skip': 'Blocked outside Mainland China',
  312. }, {
  313. 'note': 'Toplist/Charts sample',
  314. 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
  315. 'info_dict': {
  316. 'id': '3733003',
  317. 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
  318. 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
  319. },
  320. 'playlist_count': 50,
  321. 'skip': 'Blocked outside Mainland China',
  322. }]
  323. def _real_extract(self, url):
  324. list_id = self._match_id(url)
  325. info = self.query_api(
  326. 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
  327. list_id, 'Downloading playlist data')['result']
  328. name = info['name']
  329. desc = info.get('description')
  330. if info.get('specialType') == 10: # is a chart/toplist
  331. datestamp = datetime.fromtimestamp(
  332. self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
  333. name = '%s %s' % (name, datestamp)
  334. entries = [
  335. self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  336. 'NetEaseMusic', song['id'])
  337. for song in info['tracks']
  338. ]
  339. return self.playlist_result(entries, list_id, name, desc)
  340. class NetEaseMusicMvIE(NetEaseMusicBaseIE):
  341. IE_NAME = 'netease:mv'
  342. IE_DESC = '网易云音乐 - MV'
  343. _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
  344. _TEST = {
  345. 'url': 'http://music.163.com/#/mv?id=415350',
  346. 'info_dict': {
  347. 'id': '415350',
  348. 'ext': 'mp4',
  349. 'title': '이럴거면 그러지말지',
  350. 'description': '白雅言自作曲唱甜蜜爱情',
  351. 'creator': '白雅言',
  352. 'upload_date': '20150520',
  353. },
  354. 'skip': 'Blocked outside Mainland China',
  355. }
  356. def _real_extract(self, url):
  357. mv_id = self._match_id(url)
  358. info = self.query_api(
  359. 'mv/detail?id=%s&type=mp4' % mv_id,
  360. mv_id, 'Downloading mv info')['data']
  361. formats = [
  362. {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
  363. for brs, mv_url in info['brs'].items()
  364. ]
  365. return {
  366. 'id': mv_id,
  367. 'title': info['name'],
  368. 'description': info.get('desc') or info.get('briefDesc'),
  369. 'creator': info['artistName'],
  370. 'upload_date': info['publishTime'].replace('-', ''),
  371. 'formats': formats,
  372. 'thumbnail': info.get('cover'),
  373. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  374. }
  375. class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
  376. IE_NAME = 'netease:program'
  377. IE_DESC = '网易云音乐 - 电台节目'
  378. _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
  379. _TESTS = [{
  380. 'url': 'http://music.163.com/#/program?id=10109055',
  381. 'info_dict': {
  382. 'id': '10109055',
  383. 'ext': 'mp3',
  384. 'title': '不丹足球背后的故事',
  385. 'description': '喜马拉雅人的足球梦 ...',
  386. 'creator': '大话西藏',
  387. 'timestamp': 1434179342,
  388. 'upload_date': '20150613',
  389. 'duration': 900,
  390. },
  391. 'skip': 'Blocked outside Mainland China',
  392. }, {
  393. 'note': 'This program has accompanying songs.',
  394. 'url': 'http://music.163.com/#/program?id=10141022',
  395. 'info_dict': {
  396. 'id': '10141022',
  397. 'title': '25岁,你是自在如风的少年<27°C>',
  398. 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  399. },
  400. 'playlist_count': 4,
  401. 'skip': 'Blocked outside Mainland China',
  402. }, {
  403. 'note': 'This program has accompanying songs.',
  404. 'url': 'http://music.163.com/#/program?id=10141022',
  405. 'info_dict': {
  406. 'id': '10141022',
  407. 'ext': 'mp3',
  408. 'title': '25岁,你是自在如风的少年<27°C>',
  409. 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  410. 'timestamp': 1434450841,
  411. 'upload_date': '20150616',
  412. },
  413. 'params': {
  414. 'noplaylist': True
  415. },
  416. 'skip': 'Blocked outside Mainland China',
  417. }]
  418. def _real_extract(self, url):
  419. program_id = self._match_id(url)
  420. info = self.query_api(
  421. 'dj/program/detail?id=%s' % program_id,
  422. program_id, 'Downloading program info')['program']
  423. name = info['name']
  424. description = info['description']
  425. if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
  426. formats = self.extract_formats(info['mainSong'])
  427. return {
  428. 'id': info['mainSong']['id'],
  429. 'title': name,
  430. 'description': description,
  431. 'creator': info['dj']['brand'],
  432. 'timestamp': self.convert_milliseconds(info['createTime']),
  433. 'thumbnail': info['coverUrl'],
  434. 'duration': self.convert_milliseconds(info.get('duration', 0)),
  435. 'formats': formats,
  436. }
  437. song_ids = [info['mainSong']['id']]
  438. song_ids.extend([song['id'] for song in info['songs']])
  439. entries = [
  440. self.url_result('http://music.163.com/#/song?id=%s' % song_id,
  441. 'NetEaseMusic', song_id)
  442. for song_id in song_ids
  443. ]
  444. return self.playlist_result(entries, program_id, name, description)
  445. class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
  446. IE_NAME = 'netease:djradio'
  447. IE_DESC = '网易云音乐 - 电台'
  448. _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
  449. _TEST = {
  450. 'url': 'http://music.163.com/#/djradio?id=42',
  451. 'info_dict': {
  452. 'id': '42',
  453. 'title': '声音蔓延',
  454. 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
  455. },
  456. 'playlist_mincount': 40,
  457. 'skip': 'Blocked outside Mainland China',
  458. }
  459. _PAGE_SIZE = 1000
  460. def _real_extract(self, url):
  461. dj_id = self._match_id(url)
  462. name = None
  463. desc = None
  464. entries = []
  465. for offset in itertools.count(start=0, step=self._PAGE_SIZE):
  466. info = self.query_api(
  467. 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
  468. % (self._PAGE_SIZE, dj_id, offset),
  469. dj_id, 'Downloading dj programs - %d' % offset)
  470. entries.extend([
  471. self.url_result(
  472. 'http://music.163.com/#/program?id=%s' % program['id'],
  473. 'NetEaseMusicProgram', program['id'])
  474. for program in info['programs']
  475. ])
  476. if name is None:
  477. radio = info['programs'][0]['radio']
  478. name = radio['name']
  479. desc = radio['desc']
  480. if not info['more']:
  481. break
  482. return self.playlist_result(entries, dj_id, name, desc)