py_czspp.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. from base.spider import Spider
  6. import base64
  7. import hashlib
  8. import requests
  9. from Crypto.Cipher import AES
  10. import urllib
  11. class Spider(Spider): # 元类 默认的元类 type
  12. def getName(self):
  13. return "厂长资源"
  14. def init(self, extend=""):
  15. print("============{0}============".format(extend))
  16. pass
  17. def homeContent(self, filter):
  18. result = {}
  19. cateManual = {
  20. "豆瓣电影Top250": "dbtop250",
  21. "最新电影": "zuixindianying",
  22. "电视剧": "dsj",
  23. "国产剧": "gcj",
  24. "美剧": "meijutt",
  25. "韩剧": "hanjutv",
  26. "番剧": "fanju",
  27. "动漫": "dm"
  28. }
  29. classes = []
  30. for k in cateManual:
  31. classes.append({
  32. 'type_name': k,
  33. 'type_id': cateManual[k]
  34. })
  35. result['class'] = classes
  36. return result
  37. def homeVideoContent(self):
  38. url = "https://czspp.com"
  39. header = {
  40. "Connection": "keep-alive",
  41. "Referer": url,
  42. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  43. }
  44. session = self.getCookie(url,header)
  45. rsp = session.get(url, headers=header)
  46. root = self.html(self.cleanText(rsp.text))
  47. aList = root.xpath("//div[@class='mi_btcon']//ul/li")
  48. videos = []
  49. for a in aList:
  50. name = a.xpath('./a/img/@alt')[0]
  51. pic = a.xpath('./a/img/@data-original')[0]
  52. mark = a.xpath("./div[@class='hdinfo']/span/text()")[0]
  53. sid = a.xpath("./a/@href")[0]
  54. sid = self.regStr(sid, "/movie/(\\S+).html")
  55. videos.append({
  56. "vod_id": sid,
  57. "vod_name": name,
  58. "vod_pic": pic,
  59. "vod_remarks": mark
  60. })
  61. result = {
  62. 'list': videos
  63. }
  64. return result
  65. def getCookie(self,url):
  66. header = {
  67. "Referer": 'https://czspp.com/',
  68. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  69. }
  70. session = requests.session()
  71. rsp = session.get(url)
  72. if '人机验证' in rsp.text:
  73. append = self.regStr(rsp.text, 'src=\"(/.*?)\"')
  74. nurl = 'https://czspp.com' + append
  75. nrsp = session.get(nurl, headers=header)
  76. key = self.regStr(nrsp.text, 'var key=\"(.*?)\"')
  77. avalue = self.regStr(nrsp.text, 'value=\"(.*?)\"')
  78. c = ''
  79. for i in range(0, len(avalue)):
  80. a = avalue[i]
  81. b = ord(a)
  82. c = c + str(b)
  83. value = hashlib.md5(c.encode()).hexdigest()
  84. session.get('https://czspp.com/a20be899_96a6_40b2_88ba_32f1f75f1552_yanzheng_ip.php?type=96c4e20a0e951f471d32dae103e83881&key={0}&value={1}'.format(key, value), headers=header)
  85. return session.get(url, headers=header)
  86. elif '检测中' in rsp.text:
  87. append = self.regStr(rsp.text, 'href =\"(/.*?)\"')
  88. session.get('https://czspp.com{0}'.format(append), headers=header)
  89. return session.get(url, headers=header)
  90. else:
  91. return rsp
  92. def categoryContent(self, tid, pg, filter, extend):
  93. result = {}
  94. url = 'https://czspp.com/{0}/page/{1}'.format(tid,pg)
  95. rsp = self.getCookie(url)
  96. root = self.html(self.cleanText(rsp.text))
  97. aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
  98. videos = []
  99. for a in aList:
  100. name = a.xpath('./a/img/@alt')[0]
  101. pic = a.xpath('./a/img/@data-original')[0]
  102. mark = a.xpath(".//div[@class='jidi']/span/text()")
  103. if mark ==[]:
  104. mark = a.xpath("./div[@class='hdinfo']/span/text()")
  105. mark = mark[0]
  106. sid = a.xpath("./a/@href")[0]
  107. sid = self.regStr(sid, "/movie/(\\S+).html")
  108. videos.append({
  109. "vod_id": sid,
  110. "vod_name": name,
  111. "vod_pic": pic,
  112. "vod_remarks": mark
  113. })
  114. result['list'] = videos
  115. result['page'] = pg
  116. result['pagecount'] = 9999
  117. result['limit'] = 90
  118. result['total'] = 999999
  119. return result
  120. def detailContent(self, array):
  121. tid = array[0]
  122. url = 'https://czspp.com/movie/{0}.html'.format(tid)
  123. rsp = self.getCookie(url)
  124. root = self.html(self.cleanText(rsp.text))
  125. node = root.xpath("//div[@class='dyxingq']")[0]
  126. pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
  127. title = node.xpath('.//h1/text()')[0]
  128. detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
  129. vod = {
  130. "vod_id": tid,
  131. "vod_name": title,
  132. "vod_pic": pic,
  133. "type_name": "",
  134. "vod_year": "",
  135. "vod_area": "",
  136. "vod_remarks": "",
  137. "vod_actor": "",
  138. "vod_director": "",
  139. "vod_content": detail
  140. }
  141. infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
  142. for info in infoArray:
  143. content = info.xpath('string(.)')
  144. if content.startswith('地区'):
  145. tpyeare = ''
  146. for inf in info:
  147. tn = inf.text
  148. tpyeare = tpyeare +'/'+'{0}'.format(tn)
  149. vod['vod_area'] = tpyeare.strip('/')
  150. if content.startswith('年份'):
  151. vod['vod_year'] = content.replace("年份:","")
  152. if content.startswith('主演'):
  153. tpyeact = ''
  154. for inf in info:
  155. tn = inf.text
  156. tpyeact = tpyeact +'/'+'{0}'.format(tn)
  157. vod['vod_actor'] = tpyeact.strip('/')
  158. if content.startswith('导演'):
  159. tpyedire = ''
  160. for inf in info:
  161. tn = inf.text
  162. tpyedire = tpyedire +'/'+'{0}'.format(tn)
  163. vod['vod_director'] = tpyedire .strip('/')
  164. vod_play_from = '$$$'
  165. playFrom = ['厂长']
  166. vod_play_from = vod_play_from.join(playFrom)
  167. vod_play_url = '$$$'
  168. playList = []
  169. vodList = root.xpath("//div[@class='paly_list_btn']")
  170. for vl in vodList:
  171. vodItems = []
  172. aList = vl.xpath('./a')
  173. for tA in aList:
  174. href = tA.xpath('./@href')[0]
  175. name = tA.xpath('./text()')[0].replace('\xa0','')
  176. tId = self.regStr(href, '/v_play/(\\S+).html')
  177. vodItems.append(name + "$" + tId)
  178. joinStr = '#'
  179. joinStr = joinStr.join(vodItems)
  180. playList.append(joinStr)
  181. vod_play_url = vod_play_url.join(playList)
  182. vod['vod_play_from'] = vod_play_from
  183. vod['vod_play_url'] = vod_play_url
  184. result = {
  185. 'list': [
  186. vod
  187. ]
  188. }
  189. return result
  190. def searchContent(self, key, quick):
  191. url = 'https://czspp.com/xssearch?q={0}'.format(urllib.parse.quote(key))
  192. rsp = self.getCookie(url)
  193. root = self.html(self.cleanText(rsp.text))
  194. vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
  195. videos = []
  196. for vod in vodList:
  197. name = vod.xpath('./img/@alt')[0]
  198. pic = vod.xpath('./img/@data-original')[0]
  199. href = vod.xpath('./@href')[0]
  200. tid = self.regStr(href, 'movie/(\\S+).html')
  201. res = vod.xpath('./div[@class="jidi"]/span/text()')
  202. if len(res) == 0:
  203. remark = '全1集'
  204. else:
  205. remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
  206. videos.append({
  207. "vod_id": tid,
  208. "vod_name": name,
  209. "vod_pic": pic,
  210. "vod_remarks": remark
  211. })
  212. result = {
  213. 'list': videos
  214. }
  215. return result
  216. config = {
  217. "player": {},
  218. "filter": {}
  219. }
  220. header = {
  221. "Referer": "https://czspp.com/",
  222. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
  223. }
  224. def parseCBC(self, enc, key, iv):
  225. keyBytes = key.encode("utf-8")
  226. ivBytes = iv.encode("utf-8")
  227. cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
  228. msg = cipher.decrypt(enc)
  229. paddingLen = msg[len(msg) - 1]
  230. return msg[0:-paddingLen]
  231. def playerContent(self, flag, id, vipFlags):
  232. result = {}
  233. url = 'https://czspp.com/v_play/{0}.html'.format(id)
  234. rsp = self.getCookie(url)
  235. pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
  236. html = rsp.text
  237. content = self.regStr(html, pat)
  238. if content == '':
  239. str3 = url
  240. pars = 1
  241. header = {
  242. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
  243. }
  244. else:
  245. key = self.regStr(html, pat, 2)
  246. iv = self.regStr(html, pat, 3)
  247. decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
  248. urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
  249. vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
  250. str3 = self.regStr(decontent, urlPat)
  251. str4 = self.regStr(decontent, vttPat)
  252. self.loadVtt(str3)
  253. pars = 0
  254. header = ''
  255. if len(str4) > 0:
  256. result['subf'] = '/vtt/utf-8'
  257. result['subt'] = ''
  258. result = {
  259. 'parse': pars,
  260. 'playUrl': '',
  261. 'url': str3,
  262. 'header': header
  263. }
  264. return result
  265. def loadVtt(self, url):
  266. pass
  267. def isVideoFormat(self, url):
  268. pass
  269. def manualVideoCheck(self):
  270. pass
  271. def localProxy(self, param):
  272. action = {}
  273. return [200, "video/MP2T", action, ""]