黑料网.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. import requests
  5. from bs4 import BeautifulSoup
  6. import re
  7. import base64
  8. from base.spider import Spider
  9. import random
  10. sys.path.append('..')
  11. xurl = "https://heiliaowang-44.buzz"
  12. headerx = {
  13. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36',
  14. }
  15. class Spider(Spider):
  16. global xurl
  17. global headerx
  18. def getName(self):
  19. return "首页"
  20. def init(self, extend):
  21. pass
  22. def isVideoFormat(self, url):
  23. pass
  24. def manualVideoCheck(self):
  25. pass
  26. def homeContent(self, filter):
  27. res = requests.get(xurl, headers=headerx)
  28. res.encoding = "utf-8"
  29. doc = BeautifulSoup(res.text, "html.parser")
  30. sourcediv = doc.find('div', class_='nav')
  31. vod = sourcediv.find_all('dd')
  32. string_list = ["首页", "激情图漫", "激情小说",
  33. "情色小说", "随机推荐", "顶级资源"]
  34. result = {}
  35. result['class'] = []
  36. result['class'].append({'type_id': "/type/328", 'type_name': "国产视频"})
  37. result['class'].append({'type_id': "/type/329", 'type_name': "中文字幕"})
  38. result['class'].append({'type_id': "/type/331", 'type_name': "日本有码"})
  39. result['class'].append({'type_id': "/type/332", 'type_name': "日本无码"})
  40. result['class'].append({'type_id': "/type/333", 'type_name': "欧美无码"})
  41. result['class'].append({'type_id': "/type/334", 'type_name': "强奸乱轮"})
  42. result['class'].append({'type_id': "/type/335", 'type_name': "制服诱惑"})
  43. result['class'].append({'type_id': "/type/336", 'type_name': "直播主播"})
  44. result['class'].append({'type_id': "/type/338", 'type_name': "明星换脸"})
  45. result['class'].append({'type_id': "/type/339", 'type_name': "抖阴视频"})
  46. result['class'].append({'type_id': "/type/340", 'type_name': "女优明星"})
  47. result['class'].append({'type_id': "/type/343", 'type_name': "网爆门"})
  48. result['class'].append({'type_id': "/type/345", 'type_name': "伦理三级"})
  49. result['class'].append({'type_id': "/type/346", 'type_name': "AV解说"})
  50. result['class'].append({'type_id': "/type/347", 'type_name': "SM调教"})
  51. result['class'].append({'type_id': "/type/348", 'type_name': "萝莉少女"})
  52. result['class'].append({'type_id': "/type/349", 'type_name': "极品媚黑"})
  53. result['class'].append({'type_id': "/type/350", 'type_name': "女同性恋"})
  54. result['class'].append({'type_id': "/type/351", 'type_name': "玩偶姐姐"})
  55. result['class'].append({'type_id': "/type/353", 'type_name': "人妖系列"})
  56. result['class'].append({'type_id': "/type/373", 'type_name': "韩国主播"})
  57. result['class'].append({'type_id': "/type/378", 'type_name': "VR视角"})
  58. for item in vod:
  59. name = item.find('a').text
  60. if name in string_list:
  61. continue
  62. id = item.find('a')['href']
  63. id = id.replace(".html", "")
  64. result['class'].append({'type_id': id, 'type_name': name})
  65. return result
  66. def homeVideoContent(self):
  67. videos = []
  68. try:
  69. res = requests.get(xurl, headers=headerx)
  70. res.encoding = "utf-8"
  71. doc = BeautifulSoup(res.text, "html.parser")
  72. sourcediv = doc.find_all('div', class_='pic')
  73. for vod in sourcediv:
  74. ul_elements = vod.find_all('ul')
  75. for item in ul_elements:
  76. name = item.select_one("li a")['title']
  77. pic = item.select_one("li a img")["data-src"]
  78. remark = item.select_one("li a span").text
  79. id = item.select_one("li a")['href']
  80. video = {
  81. "vod_id": id,
  82. "vod_name": name,
  83. "vod_pic": pic,
  84. "vod_remarks": remark
  85. }
  86. videos.append(video)
  87. except:
  88. pass
  89. result = {'list': videos}
  90. return result
  91. def categoryContent(self, cid, pg, filter, ext):
  92. result = {}
  93. videos = []
  94. if not pg:
  95. pg = 1
  96. url = xurl + cid + "/" + str(pg) + ".html"
  97. detail = requests.get(url=url, headers=headerx)
  98. detail.encoding = "utf-8"
  99. doc = BeautifulSoup(detail.text, "html.parser")
  100. sourcediv = doc.find_all('div', class_='pic')
  101. for vod in sourcediv:
  102. ul_elements = vod.find_all('ul')
  103. for item in ul_elements:
  104. name = item.select_one("li a")['title']
  105. pic = item.select_one("li a img")["src"]
  106. remark = item.select_one("li a span").text
  107. id = item.select_one("li a")['href']
  108. video = {
  109. "vod_id": id,
  110. "vod_name": name,
  111. "vod_pic": pic,
  112. "vod_remarks": remark
  113. }
  114. videos.append(video)
  115. result['list'] = videos
  116. result['page'] = pg
  117. result['pagecount'] = 9999
  118. result['limit'] = 90
  119. result['total'] = 999999
  120. return result
  121. def detailContent(self, ids):
  122. did = ids[0]
  123. videos = []
  124. result = {}
  125. res = requests.get(url=xurl + did, headers=headerx)
  126. res.encoding = "utf-8"
  127. doc = BeautifulSoup(res.text, "html.parser")
  128. sourcediv = doc.find('div', style='padding-bottom: 10px;')
  129. vod = sourcediv.find_all('a')
  130. play_from = ""
  131. play_url = ""
  132. for item in vod:
  133. play_from = play_from + item.text + "$$$"
  134. play_url = play_url + item['href'] + "$$$"
  135. while play_url[-1] == "#" or play_url[-1] == "$":
  136. play_url = play_url[:-1]
  137. while play_from[-1] == "#" or play_from[-1] == "$":
  138. play_from = play_from[:-1]
  139. source_match = re.search(r"<title>(.*?)的播放地址", res.text)
  140. if source_match:
  141. tx = source_match.group(1)
  142. videos.append({
  143. "vod_id": did,
  144. "vod_name": tx,
  145. "vod_pic": "",
  146. "type_name": "ぃぅおか🍬 คิดถึง",
  147. "vod_year": "",
  148. "vod_area": "",
  149. "vod_remarks": "",
  150. "vod_actor": "",
  151. "vod_director": "",
  152. "vod_content": "",
  153. "vod_play_from": play_from,
  154. "vod_play_url": play_url
  155. })
  156. result['list'] = videos
  157. return result
  158. def playerContent(self, flag, id, vipFlags):
  159. result = {}
  160. res = requests.get(url=xurl + id, headers=headerx)
  161. res.encoding = "utf-8"
  162. if '"rid"' in res.text:
  163. decoded_str = ''
  164. while not decoded_str:
  165. source_match3 = re.search(r'"rid" : "(.*?)"', res.text)
  166. if source_match3:
  167. id = source_match3.group(1)
  168. data = "rid=" + id
  169. header = {
  170. "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36",
  171. 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'
  172. }
  173. res2 = requests.post(url="https://heiliaowang-44.buzz/fetchPlayUrl3", headers=header, data=data)
  174. source_match4 = re.search(r'"returnData"\s*:\s*"([^"]+)"', res2.text)
  175. if source_match4:
  176. decoded_str = source_match4.group(1)
  177. else:
  178. source_match = re.search(r"http:(.*?)\.m3u8", res.text)
  179. decoded_str = ""
  180. if source_match:
  181. str3 = source_match.group(1)
  182. if "aHR0c" in str3:
  183. padding_needed = len(str3) % 4
  184. if padding_needed:
  185. str3 += '=' * (4 - padding_needed)
  186. decoded_str = base64.b64decode(str3).decode("utf-8")
  187. if not decoded_str:
  188. source_match2 = re.search(r"'(.*?)\.m3u8';", res.text)
  189. if source_match2:
  190. decoded_str = source_match2.group(1) + ".m3u8"
  191. result["parse"] = 0
  192. result["playUrl"] = ''
  193. result["url"] = decoded_str
  194. result["header"] = headerx
  195. return result
  196. def searchContent(self, key, quick):
  197. return self.searchContentPage(key, quick, '1')
  198. def searchContentPage(self, key, quick, page):
  199. result = {}
  200. videos = []
  201. if not page:
  202. page = 1
  203. url = xurl +"/search/"+ key +"/n/" + str(page)+".html"
  204. detail = requests.get(url=url, headers=headerx)
  205. detail.encoding = "utf-8"
  206. doc = BeautifulSoup(detail.text, "html.parser")
  207. sourcediv = doc.find_all('div', class_='pic')
  208. for vod in sourcediv:
  209. ul_elements = vod.find_all('ul')
  210. for item in ul_elements:
  211. name = item.select_one("li a")['title']
  212. pic = item.select_one("li a img")["src"]
  213. remark = item.select_one("li a span").text
  214. id = item.select_one("li a")['href']
  215. video = {
  216. "vod_id": id,
  217. "vod_name": name,
  218. "vod_pic": pic,
  219. "vod_remarks": remark
  220. }
  221. videos.append(video)
  222. result['list'] = videos
  223. result['page'] = page
  224. result['pagecount'] = 9999
  225. result['limit'] = 90
  226. result['total'] = 999999
  227. return result
  228. def localProxy(self, params):
  229. if params['type'] == "m3u8":
  230. return self.proxyM3u8(params)
  231. elif params['type'] == "media":
  232. return self.proxyMedia(params)
  233. elif params['type'] == "ts":
  234. return self.proxyTs(params)
  235. return None