py_czspp.py 9.6 KB


  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import base64
  7. from requests import session, utils
  8. from Crypto.Cipher import AES
  9. class Spider(Spider): # 元类 默认的元类 type
  10. def getName(self):
  11. return "厂长资源"
  12. def init(self, extend=""):
  13. print("============{0}============".format(extend))
  14. pass
  15. def homeContent(self, filter):
  16. result = {}
  17. cateManual = {
  18. "豆瓣电影Top250": "dbtop250",
  19. "最新电影": "zuixindianying",
  20. "电视剧": "dsj",
  21. "国产剧": "gcj",
  22. "美剧": "meijutt",
  23. "韩剧": "hanjutv",
  24. "番剧": "fanju",
  25. "动漫": "dm"
  26. }
  27. classes = []
  28. for k in cateManual:
  29. classes.append({
  30. 'type_name': k,
  31. 'type_id': cateManual[k]
  32. })
  33. result['class'] = classes
  34. return result
  35. def homeVideoContent(self):
  36. url = "https://czspp.com"
  37. if len(self.cookies) <= 0:
  38. self.getCookie(url)
  39. url = url + self.zid
  40. rsp = self.fetch(url)
  41. root = self.html(self.cleanText(rsp.text))
  42. aList = root.xpath("//div[@class='mi_btcon']//ul/li")
  43. videos = []
  44. for a in aList:
  45. name = a.xpath('./a/img/@alt')[0]
  46. pic = a.xpath('./a/img/@data-original')[0]
  47. mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
  48. sid = a.xpath("./a/@href")[0]
  49. sid = self.regStr(sid, "/movie/(\\S+).html")
  50. videos.append({
  51. "vod_id": sid,
  52. "vod_name": name,
  53. "vod_pic": pic,
  54. "vod_remarks": mark
  55. })
  56. result = {
  57. 'list': videos
  58. }
  59. return result
  60. header = {
  61. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
  62. cookies = ''
  63. def getCookie(self,url):
  64. rsp = self.fetch(url,headers=self.header)
  65. baseurl = self.regStr(reg=r'(https://.*?/)', src=url)
  66. append = url.replace(baseurl,'')
  67. zid = self.regStr(rsp.text, "{0}(\\S+)\"".format(append))
  68. self.zid = zid
  69. self.cookies = rsp.cookies
  70. if 'btwaf' not in zid:
  71. zid = ''
  72. return rsp.cookies, zid
  73. def categoryContent(self, tid, pg, filter, extend):
  74. result = {}
  75. url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
  76. if len(self.cookies) <= 0:
  77. self.getCookie(url)
  78. url = url + self.zid
  79. rsp = self.fetch(url, cookies=self.cookies,headers=self.header)
  80. root = self.html(self.cleanText(rsp.text))
  81. aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
  82. videos = []
  83. for a in aList:
  84. name = a.xpath('./a/img/@alt')[0]
  85. pic = a.xpath('./a/img/@data-original')[0]
  86. mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
  87. sid = a.xpath("./a/@href")[0]
  88. sid = self.regStr(sid, "/movie/(\\S+).html")
  89. videos.append({
  90. "vod_id": sid,
  91. "vod_name": name,
  92. "vod_pic": pic,
  93. "vod_remarks": mark
  94. })
  95. result['list'] = videos
  96. result['page'] = pg
  97. result['pagecount'] = 9999
  98. result['limit'] = 90
  99. result['total'] = 999999
  100. return result
  101. def detailContent(self, array):
  102. tid = array[0]
  103. url = 'https://czspp.com/movie/{0}.html'.format(tid)
  104. if len(self.cookies) <= 0:
  105. self.getCookie(url)
  106. url = url + self.zid
  107. rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
  108. root = self.html(self.cleanText(rsp.text))
  109. node = root.xpath("//div[@class='dyxingq']")[0]
  110. pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
  111. title = node.xpath('.//h1/text()')[0]
  112. detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
  113. vod = {
  114. "vod_id": tid,
  115. "vod_name": title,
  116. "vod_pic": pic,
  117. "type_name": "",
  118. "vod_year": "",
  119. "vod_area": "",
  120. "vod_remarks": "",
  121. "vod_actor": "",
  122. "vod_director": "",
  123. "vod_content": detail
  124. }
  125. infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
  126. for info in infoArray:
  127. content = info.xpath('string(.)')
  128. if content.startswith('地区'):
  129. tpyeare = ''
  130. for inf in info:
  131. tn = inf.text
  132. tpyeare = tpyeare +'/'+'{0}'.format(tn)
  133. vod['vod_area'] = tpyeare.strip('/')
  134. if content.startswith('年份'):
  135. vod['vod_year'] = content.replace("年份:","")
  136. if content.startswith('主演'):
  137. tpyeact = ''
  138. for inf in info:
  139. tn = inf.text
  140. tpyeact = tpyeact +'/'+'{0}'.format(tn)
  141. vod['vod_actor'] = tpyeact.strip('/')
  142. if content.startswith('导演'):
  143. tpyedire = ''
  144. for inf in info:
  145. tn = inf.text
  146. tpyedire = tpyedire +'/'+'{0}'.format(tn)
  147. vod['vod_director'] = tpyedire .strip('/')
  148. vod_play_from = '$$$'
  149. playFrom = ['厂长']
  150. vod_play_from = vod_play_from.join(playFrom)
  151. vod_play_url = '$$$'
  152. playList = []
  153. vodList = root.xpath("//div[@class='paly_list_btn']")
  154. for vl in vodList:
  155. vodItems = []
  156. aList = vl.xpath('./a')
  157. for tA in aList:
  158. href = tA.xpath('./@href')[0]
  159. name = tA.xpath('./text()')[0].replace('\xa0','')
  160. tId = self.regStr(href, '/v_play/(\\S+).html')
  161. vodItems.append(name + "$" + tId)
  162. joinStr = '#'
  163. joinStr = joinStr.join(vodItems)
  164. playList.append(joinStr)
  165. vod_play_url = vod_play_url.join(playList)
  166. vod['vod_play_from'] = vod_play_from
  167. vod['vod_play_url'] = vod_play_url
  168. result = {
  169. 'list': [
  170. vod
  171. ]
  172. }
  173. return result
  174. def searchContent(self, key, quick):
  175. url = 'https://czspp.com/xssearch?q={0}'.format(key)
  176. if len(self.cookies) <= 0:
  177. self.getCookie(url)
  178. url = url + self.zid
  179. rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
  180. root = self.html(self.cleanText(rsp.text))
  181. vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
  182. videos = []
  183. for vod in vodList:
  184. name = vod.xpath('./img/@alt')[0]
  185. pic = vod.xpath('./img/@data-original')[0]
  186. href = vod.xpath('./@href')[0]
  187. tid = self.regStr(href, 'movie/(\\S+).html')
  188. res = vod.xpath('./div[@class="jidi"]/span/text()')
  189. if len(res) == 0:
  190. remark = '全1集'
  191. else:
  192. remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
  193. videos.append({
  194. "vod_id": tid,
  195. "vod_name": name,
  196. "vod_pic": pic,
  197. "vod_remarks": remark
  198. })
  199. result = {
  200. 'list': videos
  201. }
  202. return result
  203. config = {
  204. "player": {},
  205. "filter": {}
  206. }
  207. header = {
  208. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
  209. }
  210. def parseCBC(self, enc, key, iv):
  211. keyBytes = key.encode("utf-8")
  212. ivBytes = iv.encode("utf-8")
  213. cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
  214. msg = cipher.decrypt(enc)
  215. paddingLen = msg[len(msg) - 1]
  216. return msg[0:-paddingLen]
  217. def playerContent(self, flag, id, vipFlags):
  218. result = {}
  219. url = 'https://czspp.com/v_play/{0}.html'.format(id)
  220. if len(self.cookies) <= 0:
  221. self.getCookie(url)
  222. url = url + self.zid
  223. pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
  224. rsp = self.fetch(url,cookies=self.cookies,headers=self.header)
  225. html = rsp.text
  226. content = self.regStr(html, pat)
  227. if content == '':
  228. str3 = url
  229. pars = 1
  230. header = {
  231. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
  232. }
  233. else:
  234. key = self.regStr(html, pat, 2)
  235. iv = self.regStr(html, pat, 3)
  236. decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
  237. urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
  238. vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
  239. str3 = self.regStr(decontent, urlPat)
  240. str4 = self.regStr(decontent, vttPat)
  241. self.loadVtt(str3)
  242. pars = 0
  243. header = ''
  244. if len(str4) > 0:
  245. result['subf'] = '/vtt/utf-8'
  246. result['subt'] = ''
  247. result = {
  248. 'parse': pars,
  249. 'playUrl': '',
  250. 'url': str3,
  251. 'header': header
  252. }
  253. return result
  254. def loadVtt(self, url):
  255. pass
  256. def isVideoFormat(self, url):
  257. pass
  258. def manualVideoCheck(self):
  259. pass
  260. def localProxy(self, param):
  261. action = {}
  262. return [200, "video/MP2T", action, ""]