厂长资源.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import sys
  4. sys.path.append('..')
  5. try:
  6. # from base.spider import Spider as BaseSpider
  7. from base.spider import BaseSpider
  8. except ImportError:
  9. from t4.base.spider import BaseSpider
  10. import base64
  11. import hashlib
  12. import requests
  13. from Crypto.Cipher import AES
  14. import urllib
  15. # import re
  16. import json
  17. # from base.htmlParser import jsoup
  18. class Spider(BaseSpider): # 元类 默认的元类 type
  19. def getName(self):
  20. return "厂长资源"
  21. def init(self, extend=""):
  22. print("============{0}============".format(extend))
  23. pass
  24. def homeContent(self, filter):
  25. result = {}
  26. cateManual = {
  27. "豆瓣电影Top250": "dbtop250",
  28. "最新电影": "zuixindianying",
  29. "电视剧": "dsj",
  30. "国产剧": "gcj",
  31. "美剧": "meijutt",
  32. "韩剧": "hanjutv",
  33. "番剧": "fanju",
  34. "动漫": "dm"
  35. }
  36. classes = []
  37. for k in cateManual:
  38. classes.append({
  39. 'type_name': k,
  40. 'type_id': cateManual[k]
  41. })
  42. result['class'] = classes
  43. return result
  44. def homeVideoContent(self):
  45. url = "https://www.czys.top"
  46. header = {
  47. "Connection": "keep-alive",
  48. "Referer": url,
  49. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  50. }
  51. rsp = self.getCookie(url)
  52. root = self.html(self.cleanText(rsp.text))
  53. aList = root.xpath("//div[contains(@class,'leibox')]//ul/li")
  54. videos = []
  55. for a in aList:
  56. name = a.xpath('./a/img/@alt')[0]
  57. pic = a.xpath('./a/img/@data-original')[0]
  58. mark = ''.join(a.xpath(".//*[@class='hdinfo']//span/text()"))
  59. sid = a.xpath("./a/@href")[0]
  60. sid = self.regStr(sid, "/movie/(\\S+).html")
  61. videos.append({
  62. "vod_id": sid,
  63. "vod_name": name,
  64. "vod_pic": pic,
  65. "vod_remarks": mark
  66. })
  67. result = {
  68. 'list': videos
  69. }
  70. return result
  71. def getCookie(self, url):
  72. header = {
  73. "Referer": 'https://www.czys.top/',
  74. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
  75. }
  76. session = requests.session()
  77. rsp = session.get(url)
  78. if '人机验证' in rsp.text:
  79. append = self.regStr(rsp.text, 'src=\"(/.*?)\"')
  80. nurl = 'https://www.czys.top' + append
  81. nrsp = session.get(nurl, headers=header)
  82. key = self.regStr(nrsp.text, 'var key=\"(.*?)\"')
  83. avalue = self.regStr(nrsp.text, 'value=\"(.*?)\"')
  84. c = ''
  85. for i in range(0, len(avalue)):
  86. a = avalue[i]
  87. b = ord(a)
  88. c = c + str(b)
  89. value = hashlib.md5(c.encode()).hexdigest()
  90. session.get(
  91. 'https://www.czys.top/a20be899_96a6_40b2_88ba_32f1f75f1552_yanzheng_ip.php?type=96c4e20a0e951f471d32dae103e83881&key={0}&value={1}'.format(
  92. key, value), headers=header)
  93. return session.get(url, headers=header)
  94. elif '检测中' in rsp.text:
  95. append = self.regStr(rsp.text, 'href =\"(/.*?)\"')
  96. session.get('https://www.czys.top{0}'.format(append), headers=header)
  97. return session.get(url, headers=header)
  98. else:
  99. return rsp
  100. def categoryContent(self, tid, pg, filter, extend):
  101. result = {}
  102. url = 'https://www.czys.top/{0}/page/{1}'.format(tid, pg)
  103. rsp = self.getCookie(url)
  104. root = self.html(self.cleanText(rsp.text))
  105. aList = root.xpath("//div[contains(@class,'bt_img mi_ne_kd mrb')]/ul/li")
  106. videos = []
  107. for a in aList:
  108. name = a.xpath('./a/img/@alt')[0]
  109. pic = a.xpath('./a/img/@data-original')[0]
  110. mark = ''.join(a.xpath(".//div[@class='jidi']//span/text()"))
  111. if not mark:
  112. mark = ''.join(a.xpath("./div[@class='hdinfo']//span/text()"))
  113. sid = a.xpath("./a/@href")[0]
  114. sid = self.regStr(sid, "/movie/(\\S+).html")
  115. videos.append({
  116. "vod_id": sid,
  117. "vod_name": name,
  118. "vod_pic": pic,
  119. "vod_remarks": mark
  120. })
  121. result['list'] = videos
  122. result['page'] = pg
  123. result['pagecount'] = 9999
  124. result['limit'] = 90
  125. result['total'] = 999999
  126. return result
  127. def detailContent(self, array):
  128. tid = array[0]
  129. url = 'https://www.czys.top/movie/{0}.html'.format(tid)
  130. rsp = self.getCookie(url)
  131. root = self.html(self.cleanText(rsp.text))
  132. node = root.xpath("//div[@class='dyxingq']")[0]
  133. pic = node.xpath(".//div[@class='dyimg fl']/img/@src")[0]
  134. title = node.xpath('.//h1/text()')[0]
  135. detail = root.xpath(".//div[@class='yp_context']//p/text()")[0]
  136. vod = {
  137. "vod_id": tid,
  138. "vod_name": title,
  139. "vod_pic": pic,
  140. "type_name": "",
  141. "vod_year": "",
  142. "vod_area": "",
  143. "vod_remarks": "",
  144. "vod_actor": "",
  145. "vod_director": "",
  146. "vod_content": detail
  147. }
  148. infoArray = node.xpath(".//ul[@class='moviedteail_list']/li")
  149. for info in infoArray:
  150. content = info.xpath('string(.)')
  151. if content.startswith('地区'):
  152. tpyeare = ''
  153. for inf in info:
  154. tn = inf.text
  155. tpyeare = tpyeare + '/' + '{0}'.format(tn)
  156. vod['vod_area'] = tpyeare.strip('/')
  157. if content.startswith('年份'):
  158. vod['vod_year'] = content.replace("年份:", "")
  159. if content.startswith('主演'):
  160. tpyeact = ''
  161. for inf in info:
  162. tn = inf.text
  163. tpyeact = tpyeact + '/' + '{0}'.format(tn)
  164. vod['vod_actor'] = tpyeact.strip('/')
  165. if content.startswith('导演'):
  166. tpyedire = ''
  167. for inf in info:
  168. tn = inf.text
  169. tpyedire = tpyedire + '/' + '{0}'.format(tn)
  170. vod['vod_director'] = tpyedire.strip('/')
  171. vod_play_from = '$$$'
  172. playFrom = ['厂长']
  173. vod_play_from = vod_play_from.join(playFrom)
  174. vod_play_url = '$$$'
  175. playList = []
  176. vodList = root.xpath("//div[@class='paly_list_btn']")
  177. for vl in vodList:
  178. vodItems = []
  179. aList = vl.xpath('./a')
  180. for tA in aList:
  181. href = tA.xpath('./@href')[0]
  182. name = tA.xpath('./text()')[0].replace('\xa0', '')
  183. tId = self.regStr(href, '/v_play/(\\S+).html')
  184. vodItems.append(name + "$" + tId)
  185. joinStr = '#'
  186. joinStr = joinStr.join(vodItems)
  187. playList.append(joinStr)
  188. vod_play_url = vod_play_url.join(playList)
  189. vod['vod_play_from'] = vod_play_from
  190. vod['vod_play_url'] = vod_play_url
  191. result = {
  192. 'list': [
  193. vod
  194. ]
  195. }
  196. return result
  197. def searchContent(self, wd, quick=False, pg=1):
  198. url = 'https://www.czys.top/daoyongjiekoshibushiyoubing?q={0}'.format(urllib.parse.quote(wd))
  199. rsp = self.getCookie(url)
  200. root = self.html(self.cleanText(rsp.text))
  201. vodList = root.xpath("//div[contains(@class,'mi_ne_kd')]/ul/li/a")
  202. videos = []
  203. for vod in vodList:
  204. name = vod.xpath('./img/@alt')[0]
  205. pic = vod.xpath('./img/@data-original')[0]
  206. href = vod.xpath('./@href')[0]
  207. tid = self.regStr(href, 'movie/(\\S+).html')
  208. res = vod.xpath('./div[@class="jidi"]/span/text()')
  209. if len(res) == 0:
  210. remark = '全1集'
  211. else:
  212. remark = vod.xpath('./div[@class="jidi"]/span/text()')[0]
  213. videos.append({
  214. "vod_id": tid,
  215. "vod_name": name,
  216. "vod_pic": pic,
  217. "vod_remarks": remark
  218. })
  219. result = {
  220. 'list': videos
  221. }
  222. return result
  223. config = {
  224. "player": {},
  225. "filter": {}
  226. }
  227. header = {
  228. "Referer": "https://www.czys.top/",
  229. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
  230. }
  231. def parseCBC(self, enc, key, iv):
  232. keyBytes = key.encode("utf-8")
  233. ivBytes = iv.encode("utf-8")
  234. cipher = AES.new(keyBytes, AES.MODE_CBC, ivBytes)
  235. msg = cipher.decrypt(enc)
  236. paddingLen = msg[len(msg) - 1]
  237. return msg[0:-paddingLen]
  238. def playerContent(self, flag, id, vipFlags):
  239. result = {}
  240. url = 'https://www.czys.top/v_play/{0}.html'.format(id)
  241. rsp = self.getCookie(url)
  242. pat = '\\"([^\\"]+)\\";var [\\d\\w]+=function dncry.*md5.enc.Utf8.parse\\(\\"([\\d\\w]+)\\".*md5.enc.Utf8.parse\\(([\\d]+)\\)'
  243. html = rsp.text
  244. print(html)
  245. content = self.regStr(html, pat)
  246. if content == '':
  247. url = self.regStr(reg='<iframe.*?src=\"(.*?)\".*?</iframe>', src=html)
  248. config = self.fetch(url).text
  249. # jsp=jsoup()
  250. # url=jsp.pdfh(html, "body&&iframe&&src")
  251. # self.log(url)
  252. # config=jsp.pdfh(self.fetch(url).text,'body&&script&&Html')
  253. # self.log(config)
  254. player = self.regStr(reg='var rand = \"(.*?)\".*var player = \"(.*?)\"', src=config.replace('\n', ''),
  255. group=2)
  256. rand = self.regStr(reg='var rand = \"(.*?)\".*var player = \"(.*?)\"', src=config.replace('\n', ''),
  257. group=1)
  258. decontent = self.parseCBC(base64.b64decode(player), 'VFBTzdujpR9FWBhe', rand).decode()
  259. str3 = json.loads(decontent)['url']
  260. pars = 0
  261. header = ''
  262. else:
  263. key = self.regStr(html, pat, 2)
  264. iv = self.regStr(html, pat, 3)
  265. decontent = self.parseCBC(base64.b64decode(content), key, iv).decode()
  266. urlPat = 'video: \\{url: \\\"([^\\\"]+)\\\"'
  267. vttPat = 'subtitle: \\{url:\\\"([^\\\"]+\\.vtt)\\\"'
  268. str3 = self.regStr(decontent, urlPat)
  269. str4 = self.regStr(decontent, vttPat)
  270. self.loadVtt(str3)
  271. pars = 0
  272. header = ''
  273. if len(str4) > 0:
  274. result['subf'] = '/vtt/utf-8'
  275. result['subt'] = ''
  276. result = {
  277. 'parse': pars,
  278. 'playUrl': '',
  279. 'url': str3,
  280. 'header': header
  281. }
  282. return result
  283. def loadVtt(self, url):
  284. pass
  285. def isVideoFormat(self, url):
  286. pass
  287. def manualVideoCheck(self):
  288. pass
  289. def localProxy(self, param):
  290. action = {}
  291. return [200, "video/MP2T", action, ""]