py_bilibilimd.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. import json
  5. import time
  6. from datetime import datetime
  7. from difflib import SequenceMatcher
  8. from urllib.parse import quote, unquote
  9. sys.path.append('..')
  10. from base.spider import Spider
  11. class Spider(Spider): # 元类 默认的元类 type
  12. def getName(self):
  13. return "B站番剧"
  14. def init(self, extend):
  15. try:
  16. self.extendDict = json.loads(extend)
  17. except:
  18. self.extendDict = {}
  19. def isVideoFormat(self, url):
  20. pass
  21. def manualVideoCheck(self):
  22. pass
  23. def homeContent(self, filter):
  24. result = {}
  25. cateManual = {
  26. "番剧": "1",
  27. "国创": "4",
  28. "电影": "2",
  29. "综艺": "7",
  30. "电视剧": "5",
  31. }
  32. classes = []
  33. for k in cateManual:
  34. classes.append({
  35. 'type_name': k,
  36. 'type_id': cateManual[k]
  37. })
  38. result['class'] = classes
  39. if filter:
  40. result['filters'] = self.config['filter']
  41. current_year = datetime.now().year
  42. for resultfilter in result['filters']:
  43. for rf in result['filters'][resultfilter]:
  44. yearList = []
  45. if rf['key'] == 'year':
  46. for rfv in rf['value']:
  47. if rfv['n'].isdigit():
  48. if int(rfv['n']) < current_year:
  49. pos = rf['value'].index(rfv)
  50. for year in range(current_year, int(rfv['n']), -1):
  51. yearList.append({'v': '[{},{})'.format(year, year+1), 'n': str(year)})
  52. rf['value'].insert(pos, yearList)
  53. break
  54. else:
  55. break
  56. elif rf['key'] == 'release_date':
  57. for rfv in rf['value']:
  58. if rfv['n'].isdigit():
  59. if int(rfv['n']) < current_year:
  60. pos = rf['value'].index(rfv)
  61. for year in range(current_year, int(rfv['n']), -1):
  62. yearList.append({'v': '[{}-01-01 00:00:00,{}-01-01 00:00:00)'.format(year, year + 1), 'n': str(year)})
  63. rf['value'].insert(pos, yearList)
  64. break
  65. else:
  66. break
  67. return result
  68. def homeVideoContent(self):
  69. return self.categoryContent('1', '1', False, {})
  70. def categoryContent(self, cid, page, filter, ext):
  71. page = int(page)
  72. result = {}
  73. videos = []
  74. cookie, _, _ = self.getCookie('{}')
  75. url = 'https://api.bilibili.com/pgc/season/index/result?order=2&sort=0&pagesize=20&type=1&st={}&season_type={}&page={}'.format(cid, cid, page)
  76. for key in ext:
  77. url = url + '&{}={}' .format(key, quote(ext[key]))
  78. r = self.fetch(url, headers=self.header, cookies=cookie, timeout=5)
  79. data = json.loads(self.cleanText(r.text))
  80. vodList = data['data']['list']
  81. for vod in vodList:
  82. aid = str(vod['season_id']).strip()
  83. title = self.removeHtmlTags(self.cleanText(vod['title']))
  84. img = vod['cover'].strip()
  85. remark = vod['index_show'].strip()
  86. videos.append({
  87. "vod_id": aid,
  88. "vod_name": title,
  89. "vod_pic": img,
  90. "vod_remarks": remark
  91. })
  92. lenvideos = len(videos)
  93. if data['data']['has_next'] == 1:
  94. pagecount = page + 1
  95. else:
  96. pagecount = page
  97. result['list'] = videos
  98. result['page'] = page
  99. result['pagecount'] = pagecount
  100. result['limit'] = lenvideos
  101. result['total'] = lenvideos
  102. return result
  103. def detailContent(self, did):
  104. did = did[0]
  105. url = "http://api.bilibili.com/pgc/view/web/season?season_id={0}".format(did)
  106. r = self.fetch(url, headers=self.header, timeout=10)
  107. data = json.loads(self.cleanText(r.text))
  108. vod = {
  109. "vod_id": did,
  110. "vod_name": self.removeHtmlTags(data['result']['title']),
  111. "vod_pic": data['result']['cover'],
  112. "type_name": data['result']['share_sub_title'],
  113. "vod_actor": data['result']['actors'].replace('\n', ','),
  114. "vod_content": self.removeHtmlTags(data['result']['evaluate'])
  115. }
  116. videoList = data['result']['episodes']
  117. playUrl = ''
  118. for video in videoList:
  119. eid = video['id']
  120. cid = video['cid']
  121. name = self.removeHtmlTags(video['share_copy']).replace("#", "-").replace('$', '*')
  122. remark = time.strftime('%H:%M:%S', time.gmtime(video['duration']/1000))
  123. if remark.startswith('00:'):
  124. remark = remark[3:]
  125. playUrl = playUrl + '[{}]/{}${}_{}#'.format(remark, name, eid, cid)
  126. vod['vod_play_from'] = 'B站番剧'
  127. vod['vod_play_url'] = playUrl.strip('#')
  128. result = {
  129. 'list': [
  130. vod
  131. ]
  132. }
  133. return result
  134. def searchContent(self, key, quick):
  135. return self.searchContentPage(key, quick, '1')
  136. def searchContentPage(self, key, quick, page):
  137. videos = []
  138. cookie = ''
  139. if 'cookie' in self.extendDict:
  140. cookie = self.extendDict['cookie']
  141. if 'json' in self.extendDict:
  142. r = self.fetch(self.extendDict['json'], timeout=10)
  143. if 'cookie' in r.json():
  144. cookie = r.json()['cookie']
  145. if cookie == '':
  146. cookie = '{}'
  147. elif type(cookie) == str and cookie.startswith('http'):
  148. cookie = self.fetch(cookie, timeout=10).text.strip()
  149. try:
  150. if type(cookie) == dict:
  151. cookie = json.dumps(cookie, ensure_ascii=False)
  152. except:
  153. pass
  154. cookie, _, _ = self.getCookie(cookie)
  155. url = f'https://api.bilibili.com/x/web-interface/search/type?search_type=media_bangumi&keyword={key}&page={page}'
  156. r = self.fetch(url, headers=self.header, cookies=cookie, timeout=5)
  157. data = json.loads(self.cleanText(r.text))
  158. if 'result' not in data['data']:
  159. return {'list': videos}, 1
  160. vodList = data['data']['result']
  161. for vod in vodList:
  162. sid = str(vod['season_id']).strip()
  163. title = self.removeHtmlTags(self.cleanText(vod['title']))
  164. if SequenceMatcher(None, title, key).ratio() < 0.6 and key not in title:
  165. continue
  166. img = vod['eps'][0]['cover'].strip()
  167. remark = self.removeHtmlTags(vod['index_show']).strip()
  168. videos.append({
  169. "vod_id": sid,
  170. "vod_name": title,
  171. "vod_pic": img,
  172. "vod_remarks": remark
  173. })
  174. result = {
  175. 'list': videos
  176. }
  177. return result
  178. def playerContent(self, flag, pid, vipFlags):
  179. result = {}
  180. pidList = pid.split("_")
  181. aid = pidList[0]
  182. cid = pidList[1]
  183. url = 'https://api.bilibili.com/pgc/player/web/playurl?ep_id={0}&cid={1}&qn=120&fnval=4048&fnver=0&fourk=1'.format(aid, cid)
  184. cookie = ''
  185. extendDict = self.extendDict
  186. if 'cookie' in extendDict:
  187. cookie = extendDict['cookie']
  188. if 'json' in extendDict:
  189. r = self.fetch(extendDict['json'], timeout=10)
  190. if 'cookie' in r.json():
  191. cookie = r.json()['cookie']
  192. if cookie == '':
  193. cookie = '{}'
  194. elif type(cookie) == str and cookie.startswith('http'):
  195. cookie = self.fetch(cookie, timeout=10).text.strip()
  196. try:
  197. if type(cookie) == dict:
  198. cookie = json.dumps(cookie, ensure_ascii=False)
  199. except:
  200. pass
  201. cookiesDict, _, _ = self.getCookie(cookie)
  202. cookies = quote(json.dumps(cookiesDict))
  203. if 'thread' in extendDict:
  204. thread = str(extendDict['thread'])
  205. else:
  206. thread = '0'
  207. result["parse"] = '0'
  208. result["playUrl"] = ''
  209. result["url"] = f'http://127.0.0.1:UndCover/proxy?do=py&type=mpd&cookies={cookies}&url={quote(url)}&aid={aid}&cid={cid}&thread={thread}'
  210. result["header"] = self.header
  211. result['danmaku'] = 'https://api.bilibili.com/x/v1/dm/list.so?oid={}'.format(cid)
  212. result["format"] = 'application/dash+xml'
  213. return result
  214. def localProxy(self, params):
  215. if params['type'] == "mpd":
  216. print('localProxympd', params)
  217. return self.proxyMpd(params)
  218. if params['type'] == "media":
  219. print('localProxymedia', params)
  220. return self.proxyMedia(params)
  221. return None
  222. def proxyMpd(self, params):
  223. content, _, mediaType = self.getDash(params)
  224. if mediaType == 'mpd':
  225. action = {'url': '', 'header': self.header, 'param': '', 'type': 'string'}
  226. print('proxyMpd', action)
  227. return [200, "application/dash+xml", action, content]
  228. else:
  229. action = {'url': content, 'header': self.header, 'param': '', 'type': 'stream'}
  230. print('proxyMpd', action)
  231. return [200, "video/MP2T", action, '']
  232. def proxyMedia(self, params, forceRefresh=False):
  233. _, dashinfos, _ = self.getDash(params)
  234. if 'videoid' in params:
  235. videoid = int(params['videoid'])
  236. playurl = dashinfos['video'][videoid]['baseUrl']
  237. elif 'audioid' in params:
  238. audioid = int(params['audioid'])
  239. playurl = dashinfos['audio'][audioid]['baseUrl']
  240. else:
  241. return [404, "text/plain", {}, ""]
  242. # try:
  243. # r = self.fetch(playurl, headers=params['headers'], stream=True)
  244. # statusCode = r.status_code
  245. # except:
  246. # statusCode = 404
  247. # try:
  248. # r.close()
  249. # except:
  250. # pass
  251. # if statusCode != 200 and self.retry == 0:
  252. # self.retry += 1
  253. # self.proxyPlayurl(params, True)
  254. action = {'url': playurl, 'header': self.header, 'param': '', 'type': 'stream'}
  255. print('proxyMedia', action)
  256. return [200, "video/MP2T", action, '']
  257. def getDash(self, params, forceRefresh=False):
  258. aid = params['aid']
  259. cid = params['cid']
  260. url = unquote(params['url'])
  261. if 'thread' in params:
  262. thread = params['thread']
  263. else:
  264. thread = 0
  265. header = self.header.copy()
  266. cookieDict = json.loads(unquote(params['cookies']))
  267. key = 'bilimdmpdCache_{}_{}'.format(aid, cid)
  268. if not forceRefresh:
  269. data = self.getCache(key)
  270. print('getDash', data)
  271. if data:
  272. return data['content'], data['dashinfos'], data['type']
  273. cookies = cookieDict
  274. r = self.fetch(url, cookies=cookies, headers=header, timeout=5)
  275. data = json.loads(self.cleanText(r.text))
  276. if data['code'] != 0:
  277. return '', {}, ''
  278. if not 'dash' in data['result']:
  279. purl = data['result']['durl'][0]['url']
  280. try:
  281. expiresAt = int(self.regStr(reg='deadline=(\d+)', src=purl).group(1)) - 60
  282. except:
  283. expiresAt = int(time.time()) + 600
  284. if int(thread) > 0:
  285. try:
  286. self.fetch('http://127.0.0.1:7777')
  287. except:
  288. self.fetch('http://127.0.0.1:9978/go')
  289. purl = f'http://127.0.0.1:7777?url={quote(purl)}&thread={thread}'
  290. self.setCache(key, {'content': purl, 'type': 'mp4', 'dashinfos': {}, 'expiresAt': expiresAt})
  291. return purl, {}, 'mp4'
  292. dashinfos = data['result']['dash']
  293. duration = dashinfos['duration']
  294. minBufferTime = dashinfos['minBufferTime']
  295. videoinfo = ''
  296. videoid = 0
  297. deadlineList = []
  298. # videoList = sorted(dashinfos['video'], key=lambda x: x['bandwidth'], reverse=True)
  299. for video in dashinfos['video']:
  300. try:
  301. deadline = int(self.regStr(reg='deadline=(\d+)', src=video['baseUrl']).group(1))
  302. except:
  303. deadline = int(time.time()) + 600
  304. deadlineList.append(deadline)
  305. codecs = vid