py_bilibili.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. #coding=utf-8
  2. #!/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import json
  7. from requests import session, utils
  8. import os
  9. import time
  10. import base64
  11. class Spider(Spider): # 元类 默认的元类 type
  12. def getName(self):
  13. return "B站视频"
  14. def init(self,extend=""):
  15. print("============{0}============".format(extend))
  16. pass
  17. def isVideoFormat(self,url):
  18. pass
  19. def manualVideoCheck(self):
  20. pass
  21. def homeContent(self,filter):
  22. result = {}
  23. cateManual = {
  24. "演唱会":"演唱会4K",
  25. "MV":"MV4K",
  26. "窗白噪音":"窗白噪音4K",
  27. "风景":"风景4K",
  28. "说案":"说案",
  29. "戏曲":"戏曲4K",
  30. "演讲":"演讲4K",
  31. "解说":"解说",
  32. "相声小品":"相声小品",
  33. "河卫国风":"河南卫视国风4K",
  34. "儿童":"儿童",
  35. "苏教版":"苏教版课程",
  36. "人教版":"人教版课程",
  37. "沪教版":"沪教版课程",
  38. "北师大版":"北师大版课程",
  39. "球星":"球星",
  40. "动物世界":"动物世界4K"
  41. }
  42. classes = []
  43. for k in cateManual:
  44. classes.append({
  45. 'type_name':k,
  46. 'type_id':cateManual[k]
  47. })
  48. result['class'] = classes
  49. if(filter):
  50. result['filters'] = self.config['filter']
  51. return result
  52. def homeVideoContent(self):
  53. result = {
  54. 'list':[]
  55. }
  56. return result
  57. cookies = ''
  58. def getCookie(self):
  59. cookies_str ="buvid3=418CAE55-A89A-0098-4B2B-C7F6E828462038635infoc; rpdid=|(u)~kmY)kml0J'uYkukRYRRJ; video_page_version=v_old_home_6; buvid_fp=418CAE55-A89A-0098-4B2B-C7F6E828462038635infoc; buvid_fp_plain=6463AA03-B557-A6CF-6E13-6309086EB29041849infoc; i-wanna-go-back=-1; CURRENT_BLACKGAP=0; CURRENT_QUALITY=80; blackside_state=0; nostalgia_conf=-1; fingerprint=63b8c1cbf6ab858bf9a04a9ff112f9bb; SESSDATA=2472ade8,1677739051,a03fc*91; bili_jct=b0d218df4c5be5b7f26d3b0ae390e826; DedeUserID=667298592; DedeUserID__ckMd5=aa18ade6353974c9; sid=5o3z9v5c; bp_video_offset_667298592=undefined; b_ut=5; CURRENT_FNVAL=16; innersign=0" #填入大会员Cookies
  60. cookies_dic = dict([co.strip().split('=') for co in cookies_str.split(';')])
  61. rsp = session()
  62. cookies_jar = utils.cookiejar_from_dict(cookies_dic)
  63. rsp.cookies = cookies_jar
  64. self.cookies = rsp.cookies
  65. return rsp.cookies
  66. def categoryContent(self,tid,pg,filter,extend):
  67. result = {}
  68. url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={0}&page={1}'.format(tid,pg)
  69. if len(self.cookies) <= 0:
  70. self.getCookie()
  71. rsp = self.fetch(url,cookies=self.cookies)
  72. content = rsp.text
  73. jo = json.loads(content)
  74. if jo['code'] != 0:
  75. rspRetry = self.fetch(url,cookies=self.getCookie())
  76. content = rspRetry.text
  77. jo = json.loads(content)
  78. videos = []
  79. vodList = jo['data']['result']
  80. for vod in vodList:
  81. aid = str(vod['aid']).strip()
  82. title = vod['title'].strip().replace("<em class=\"keyword\">","").replace("</em>","")
  83. img = 'https:' + vod['pic'].strip()
  84. remark = str(vod['duration']).strip()
  85. videos.append({
  86. "vod_id":aid,
  87. "vod_name":title,
  88. "vod_pic":img,
  89. "vod_remarks":remark
  90. })
  91. result['list'] = videos
  92. result['page'] = pg
  93. result['pagecount'] = 9999
  94. result['limit'] = 90
  95. result['total'] = 999999
  96. return result
  97. def cleanSpace(self,str):
  98. return str.replace('\n','').replace('\t','').replace('\r','').replace(' ','')
  99. def detailContent(self,array):
  100. aid = array[0]
  101. url = "https://api.bilibili.com/x/web-interface/view?aid={0}".format(aid)
  102. rsp = self.fetch(url,headers=self.header)
  103. jRoot = json.loads(rsp.text)
  104. jo = jRoot['data']
  105. title = jo['title'].replace("<em class=\"keyword\">","").replace("</em>","")
  106. pic = jo['pic']
  107. desc = jo['desc']
  108. timeStamp = jo['pubdate']
  109. timeArray = time.localtime(timeStamp)
  110. year = str(time.strftime("%Y-%m-%d %H:%M", timeArray)).replace(" ","/")
  111. dire = jo['owner']['name']
  112. typeName = jo['tname']
  113. remark = str(jo['duration']).strip()
  114. vod = {
  115. "vod_id":aid,
  116. "vod_name":title,
  117. "vod_pic":pic,
  118. "type_name":typeName,
  119. "vod_year":year,
  120. "vod_area":"",
  121. "vod_remarks":remark,
  122. "vod_actor":"",
  123. "vod_director":dire,
  124. "vod_content":desc
  125. }
  126. ja = jo['pages']
  127. playUrl = ''
  128. for tmpJo in ja:
  129. cid = tmpJo['cid']
  130. part = tmpJo['part']
  131. playUrl = playUrl + '{0}${1}_{2}#'.format(part,aid,cid)
  132. vod['vod_play_from'] = 'B站视频'
  133. vod['vod_play_url'] = playUrl
  134. result = {
  135. 'list':[
  136. vod
  137. ]
  138. }
  139. return result
  140. def searchContent(self,key,quick):
  141. url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={0}'.format(key)
  142. if len(self.cookies) <= 0:
  143. self.getCookie()
  144. rsp = self.fetch(url,cookies=self.cookies)
  145. content = rsp.text
  146. jo = json.loads(content)
  147. if jo['code'] != 0:
  148. rspRetry = self.fetch(url, cookies=self.getCookie())
  149. content = rspRetry.text
  150. jo = json.loads(content)
  151. videos = []
  152. vodList = jo['data']['result']
  153. for vod in vodList:
  154. aid = str(vod['aid']).strip()
  155. title = vod['title'].strip().replace("<em class=\"keyword\">", "").replace("</em>", "")
  156. img = 'https:' + vod['pic'].strip()
  157. remark = str(vod['duration']).strip()
  158. videos.append({
  159. "vod_id": aid,
  160. "vod_name": title,
  161. "vod_pic": img,
  162. "vod_remarks": remark
  163. })
  164. result = {
  165. 'list': videos
  166. }
  167. return result
  168. def playerContent(self,flag,id,vipFlags):
  169. result = {}
  170. ids = id.split("_")
  171. url = 'https://api.bilibili.com:443/x/player/playurl?avid={0}&cid={1}&qn=116'.format(ids[0],ids[1])
  172. if len(self.cookies) <= 0:
  173. self.getCookie()
  174. rsp = self.fetch(url,cookies=self.cookies)
  175. jRoot = json.loads(rsp.text)
  176. jo = jRoot['data']
  177. ja = jo['durl']
  178. maxSize = -1
  179. position = -1
  180. for i in range(len(ja)):
  181. tmpJo = ja[i]
  182. if maxSize < int(tmpJo['size']):
  183. maxSize = int(tmpJo['size'])
  184. position = i
  185. url = ''
  186. if len(ja) > 0:
  187. if position == -1:
  188. position = 0
  189. url = ja[position]['url']
  190. result["parse"] = 0
  191. result["playUrl"] = ''
  192. result["url"] = url
  193. result["header"] = {
  194. "Referer":"https://www.bilibili.com",
  195. "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36"
  196. }
  197. result["contentType"] = 'video/x-flv'
  198. return result
  199. config = {
  200. "player": {},
  201. "filter": {
  202. "相声小品": [
  203. {
  204. "key": "tid",
  205. "name": "分类",
  206. "value": [
  207. {
  208. "n": "全部",
  209. "v": "相声小品"
  210. },
  211. {
  212. "n": "单口相声",
  213. "v": "单口相声"
  214. },
  215. {
  216. "n": "群口相声",
  217. "v": "群口相声"
  218. },
  219. {
  220. "n": "德云社",
  221. "v": "德云社"
  222. },
  223. {
  224. "n": "青曲社",
  225. "v": "青曲社"
  226. },
  227. {
  228. "n": "郭德纲",
  229. "v": "郭德纲"
  230. },
  231. {
  232. "n": "岳云鹏",
  233. "v": "岳云鹏"
  234. },
  235. {
  236. "n": "曹云金",
  237. "v": "曹云金"
  238. },
  239. {
  240. "n": "评书",
  241. "v": "评书"
  242. },
  243. {
  244. "n": "小曲",
  245. "v": "小曲"
  246. },
  247. {
  248. "n": "二人转",
  249. "v": "二人转"
  250. },
  251. {
  252. "n": "春晚小品",
  253. "v": "春晚小品"
  254. },
  255. {
  256. "n": "赵本山",
  257. "v": "赵本山"
  258. },
  259. {
  260. "n": "陈佩斯",
  261. "v": "陈佩斯"
  262. },
  263. {
  264. "n": "冯巩",
  265. "v": "冯巩"
  266. },
  267. {
  268. "n": "宋小宝",
  269. "v": "宋小宝"
  270. },
  271. {
  272. "n": "赵丽蓉",
  273. "v": "赵丽蓉"
  274. },
  275. {
  276. "n": "郭达",
  277. "v": "郭达"
  278. },
  279. {
  280. "n": "潘长江",
  281. "v": "潘长江"
  282. },
  283. {
  284. "n": "郭冬临",
  285. "v": "郭冬临"
  286. },
  287. {
  288. "n": "严顺开",
  289. "v": "严顺开"
  290. },
  291. {
  292. "n": "文松",
  293. "v": "文松"
  294. },
  295. {
  296. "n": "开心麻花",
  297. "v": "开心麻花"
  298. },
  299. {
  300. "n": "屌丝男士",
  301. "v": "屌丝男士"
  302. },
  303. {
  304. "n": "喜剧综艺",
  305. "v": "喜剧综艺"
  306. }
  307. ]
  308. },
  309. {
  310. "key": "duration",
  311. "name": "时长",
  312. "value": [
  313. {
  314. "n": "全部",
  315. "v": "0"
  316. },
  317. {
  318. "n": "60分钟以上",
  319. "v": "4"
  320. },
  321. {
  322. "n": "30~60分钟",
  323. "v": "3"
  324. },
  325. {
  326. "n": "10~30分钟",
  327. "v": "2"
  328. },
  329. {
  330. "n": "10分钟以下",
  331. "v": "1"
  332. }
  333. ]
  334. }
  335. ]}
  336. }
  337. header = {}
  338. def localProxy(self,param):
  339. return [200, "video/MP2T", action, ""]