cms.py 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : cms.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/8/25
  6. import json
  7. # import bs4
  8. import requests
  9. import re
  10. import math
  11. import ujson
  12. from utils.web import *
  13. from utils.system import getHost
  14. from utils.config import playerConfig
  15. from utils.log import logger
  16. from utils.encode import base64Encode,base64Decode,fetch,post,request,getCryptoJS,getPreJs,buildUrl,getHome,atob,btoa
  17. from utils.encode import verifyCode,setDetail,join,urljoin2,parseText,requireCache,forceOrder,base64ToImage,encodeStr, decodeStr
  18. from utils.encode import md5 as mmd5
  19. from utils.safePython import safePython
  20. from utils.parser import runPy,runJScode,JsObjectWrapper,PyJsObject,PyJsString
  21. from utils.htmlParser import jsoup
  22. from urllib.parse import urljoin,quote,unquote
  23. from concurrent.futures import ThreadPoolExecutor # 引入线程池
  24. from flask import url_for,redirect,render_template_string
  25. from easydict import EasyDict as edict
  26. from controllers.service import storage_service
  27. def setItem(key,value):
  28. lsg = storage_service()
  29. if isinstance(key,PyJsString):
  30. key = parseText(str(key))
  31. if isinstance(value,PyJsString):
  32. value = parseText(str(value))
  33. return lsg.setItem(key,value)
  34. def getItem(key,value=''):
  35. lsg = storage_service()
  36. if isinstance(key,PyJsString):
  37. key = parseText(str(key))
  38. if isinstance(value,PyJsString):
  39. value = parseText(str(value))
  40. return lsg.getItem(key,value)
  41. def clearItem(key):
  42. lsg = storage_service()
  43. if isinstance(key,PyJsString):
  44. key = parseText(str(key))
  45. return lsg.clearItem(key)
  46. def encodeUrl(url):
  47. # return base64Encode(quote(url))
  48. # return base64Encode(url)
  49. # print(type(url))
  50. if isinstance(url,PyJsString):
  51. # obj = obj.to_dict()
  52. url = parseText(str(url))
  53. return quote(url)
  54. def stringify(obj):
  55. if isinstance(obj,PyJsObject):
  56. # obj = obj.to_dict()
  57. obj = parseText(str(obj))
  58. return json.dumps(obj, separators=(',', ':'), ensure_ascii=False)
  59. def requireObj(url):
  60. if isinstance(url,PyJsString):
  61. url = parseText(str(url))
  62. return requireCache(url)
  63. def md5(text):
  64. if isinstance(text,PyJsString):
  65. text = parseText(str(text))
  66. return mmd5(text)
  67. py_ctx = {
  68. 'requests':requests,'print':print,'base64Encode':base64Encode,'base64Decode':base64Decode,
  69. 'log':logger.info,'fetch':fetch,'post':post,'request':request,'getCryptoJS':getCryptoJS,
  70. 'buildUrl':buildUrl,'getHome':getHome,'setDetail':setDetail,'join':join,'urljoin2':urljoin2,
  71. 'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA,'UA':UA,'IOS_UA':IOS_UA,
  72. 'setItem':setItem,'getItem':getItem,'clearItem':clearItem,'stringify':stringify,'encodeUrl':encodeUrl,
  73. 'requireObj':requireObj,'md5':md5,'atob': atob, 'btoa':btoa,'base64ToImage': base64ToImage, 'encodeStr': encodeStr,
  74. 'decodeStr': decodeStr
  75. }
  76. # print(getCryptoJS())
  77. class CMS:
  78. def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None,ext=''):
  79. if new_conf is None:
  80. new_conf = {}
  81. self.lsg = storage_service()
  82. self.title = rule.get('title', '')
  83. self.id = rule.get('id', self.title)
  84. self.filter_url = rule.get('filter_url', '').replace('{{fl}}','{{fl|safe}}') # python jinjia2禁用自动编码
  85. cate_exclude = rule.get('cate_exclude','')
  86. tab_exclude = rule.get('tab_exclude','')
  87. self.lazy = rule.get('lazy', False)
  88. # self.play_disable = new_conf.get('PLAY_DISABLE',False)
  89. self.play_disable = self.lsg.getItem('PLAY_DISABLE',False)
  90. self.retry_count = new_conf.get('RETRY_CNT',3)
  91. # self.lazy_mode = new_conf.get('LAZYPARSE_MODE')
  92. self.lazy_mode = self.lsg.getItem('LAZYPARSE_MODE',2)
  93. self.ocr_api = new_conf.get('OCR_API')
  94. # self.cate_exclude = new_conf.get('CATE_EXCLUDE','')
  95. self.cate_exclude = self.lsg.getItem('CATE_EXCLUDE','')
  96. # self.tab_exclude = new_conf.get('TAB_EXCLUDE','')
  97. self.tab_exclude = self.lsg.getItem('TAB_EXCLUDE','')
  98. if cate_exclude:
  99. if not str(cate_exclude).startswith('|') and not str(self.cate_exclude).endswith('|'):
  100. self.cate_exclude = self.cate_exclude+'|'+cate_exclude
  101. else:
  102. self.cate_exclude += cate_exclude
  103. if tab_exclude:
  104. if not str(tab_exclude).startswith('|') and not str(self.tab_exclude).endswith('|'):
  105. self.tab_exclude = self.tab_exclude+'|'+tab_exclude
  106. else:
  107. self.tab_exclude += tab_exclude
  108. # print(self.cate_exclude)
  109. try:
  110. self.vod = redirect(url_for('vod')).headers['Location']
  111. except:
  112. self.vod = '/vod'
  113. # if not self.play_disable and self.lazy:
  114. if not self.play_disable:
  115. self.play_parse = rule.get('play_parse', False)
  116. try:
  117. play_url = getHost(self.lazy_mode)
  118. except:
  119. play_url = getHost(1,5705)
  120. # play_url = new_conf.get('PLAY_URL',getHost(2))
  121. if not play_url.startswith('http'):
  122. play_url = 'http://'+play_url
  123. # print(play_url)
  124. if self.play_parse:
  125. # self.play_url = play_url + self.vod + '?play_url='
  126. js0_password = self.lsg.getItem('JS0_PASSWORD')
  127. # print(f'js0密码:{js0_password}')
  128. js0_password = f'pwd={js0_password}&' if js0_password else ''
  129. self.play_url = f'{play_url}{self.vod}?{js0_password}rule={self.id}&ext={ext}&play_url='
  130. # logger.info(f'cms重定向链接:{self.play_url}')
  131. else:
  132. self.play_url = ''
  133. else:
  134. self.play_parse = False
  135. self.play_url = ''
  136. logger.info('播放免嗅地址: '+self.play_url)
  137. self.db = db
  138. self.RuleClass = RuleClass
  139. self.PlayParse = PlayParse
  140. host = rule.get('host','').rstrip('/')
  141. host = unquote(host)
  142. timeout = rule.get('timeout',5000)
  143. homeUrl = rule.get('homeUrl','/')
  144. url = rule.get('url','')
  145. detailUrl = rule.get('detailUrl','')
  146. searchUrl = rule.get('searchUrl','')
  147. default_headers = getHeaders(host)
  148. self_headers = rule.get('headers',{})
  149. default_headers.update(self_headers)
  150. headers = default_headers
  151. cookie = self.getCookie()
  152. # print(f'{self.title}cookie:{cookie}')
  153. self.oheaders = self_headers
  154. if cookie:
  155. headers['cookie'] = cookie
  156. self.oheaders['cookie'] = cookie
  157. limit = rule.get('limit',6)
  158. encoding = rule.get('编码', 'utf-8')
  159. search_encoding = rule.get('搜索编码', '')
  160. self.limit = min(limit,30)
  161. keys = headers.keys()
  162. for k in headers.keys():
  163. if str(k).lower() == 'user-agent':
  164. v = headers[k]
  165. if v == 'MOBILE_UA':
  166. headers[k] = MOBILE_UA
  167. elif v == 'PC_UA':
  168. headers[k] = PC_UA
  169. elif v == 'UC_UA':
  170. headers[k] = UC_UA
  171. elif v == 'IOS_UA':
  172. headers[k] = IOS_UA
  173. lower_keys = list(map(lambda x:x.lower(),keys))
  174. if not 'user-agent' in lower_keys:
  175. headers['User-Agent'] = UA
  176. if not 'referer' in lower_keys:
  177. headers['Referer'] = host
  178. self.headers = headers
  179. # print(headers)
  180. self.host = host
  181. self.homeUrl = urljoin(host,homeUrl) if host and homeUrl else homeUrl or host
  182. if url.find('[') >-1 and url.find(']') > -1:
  183. u1 = url.split('[')[0]
  184. u2 = url.split('[')[1].split(']')[0]
  185. self.url = urljoin(host,u1)+'['+urljoin(host,u2)+']' if host and url else url
  186. else:
  187. self.url = urljoin(host, url) if host and url else url
  188. self.detailUrl = urljoin(host,detailUrl) if host and detailUrl else detailUrl
  189. self.searchUrl = urljoin(host,searchUrl) if host and searchUrl else searchUrl
  190. self.class_name = rule.get('class_name','')
  191. self.class_url = rule.get('class_url','')
  192. self.class_parse = rule.get('class_parse','')
  193. self.filter_name = rule.get('filter_name', '')
  194. self.filter_url = rule.get('filter_url', '')
  195. self.filter_parse = rule.get('filter_parse', '')
  196. self.double = rule.get('double',False)
  197. self.一级 = rule.get('一级','')
  198. self.二级 = rule.get('二级','')
  199. self.二级访问前 = rule.get('二级访问前','')
  200. self.搜索 = rule.get('搜索','')
  201. self.推荐 = rule.get('推荐','')
  202. self.图片来源 = rule.get('图片来源','')
  203. self.encoding = encoding
  204. self.search_encoding = search_encoding
  205. self.timeout = round(int(timeout)/1000,2)
  206. self.filter = rule.get('filter',[])
  207. self.filter_def = rule.get('filter_def',{})
  208. self.play_json = rule['play_json'] if 'play_json' in rule else []
  209. self.pagecount = rule['pagecount'] if 'pagecount' in rule else {}
  210. self.extend = rule.get('extend',[])
  211. self.d = self.getObject()
  212. def getName(self):
  213. return self.title
  214. def getObject(self):
  215. o = edict({
  216. 'jsp':jsoup(self.url),
  217. 'getParse':self.getParse,
  218. 'saveParse':self.saveParse,
  219. 'oheaders':self.oheaders,
  220. 'headers':self.headers, # 通用免嗅需要
  221. 'encoding':self.encoding,
  222. 'name':self.title,
  223. 'timeout':self.timeout,
  224. })
  225. return o
  226. def regexp(self,prule,text,pos=None):
  227. ret = re.search(prule,text).groups()
  228. if pos != None and isinstance(pos,int):
  229. return ret[pos]
  230. else:
  231. return ret
  232. def test(self,text,string):
  233. searchObj = re.search(rf'{text}', string, re.M | re.I)
  234. # print(searchObj)
  235. # global vflag
  236. if searchObj:
  237. # vflag = searchObj.group()
  238. pass
  239. return searchObj
  240. def blank(self):
  241. result = {
  242. 'list': []
  243. }
  244. return result
  245. def blank_vod(self):
  246. return {
  247. "vod_id": "id",
  248. "vod_name": "片名",
  249. "vod_pic": "",# 图片
  250. "type_name": "剧情",
  251. "vod_year": "年份",
  252. "vod_area": "地区",
  253. "vod_remarks": "更新信息",
  254. "vod_actor": "主演",
  255. "vod_director": "导演",
  256. "vod_content": "简介"
  257. }
  258. def jsoup(self):
  259. jsp = jsoup(self.url)
  260. pdfh = jsp.pdfh
  261. pdfa = jsp.pdfa
  262. pd = jsp.pd
  263. pjfh = jsp.pjfh
  264. pjfa = jsp.pjfa
  265. pj = jsp.pj
  266. pq = jsp.pq
  267. return pdfh,pdfa,pd,pq
  268. def getClasses(self):
  269. if not self.db:
  270. msg = '未提供数据库连接'
  271. print(msg)
  272. return []
  273. name = self.getName()
  274. # self.db.metadata.clear()
  275. # RuleClass = rule_classes.init(self.db)
  276. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  277. # _logger.info('xxxxxx')
  278. if res:
  279. if not all([res.class_name,res.class_url]):
  280. return []
  281. cls = res.class_name.split('&')
  282. cls2 = res.class_url.split('&')
  283. classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))]
  284. # _logger.info(classes)
  285. logger.info(f"{self.getName()}使用缓存分类:{classes}")
  286. return classes
  287. else:
  288. return []
  289. def getCookie(self):
  290. name = self.getName()
  291. if not self.db:
  292. msg = f'{name}未提供数据库连接'
  293. print(msg)
  294. return False
  295. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  296. if res:
  297. return res.cookie or None
  298. else:
  299. return None
  300. def saveCookie(self,cookie):
  301. name = self.getName()
  302. if not self.db:
  303. msg = f'{name}未提供数据库连接'
  304. print(msg)
  305. return False
  306. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  307. if res:
  308. res.cookie = cookie
  309. self.db.session.add(res)
  310. else:
  311. res = self.RuleClass(name=name, cookie=cookie)
  312. self.db.session.add(res)
  313. try:
  314. self.db.session.commit()
  315. logger.info(f'{name}已保存cookie:{cookie}')
  316. except Exception as e:
  317. return f'保存cookie发生了错误:{e}'
  318. def saveClass(self, classes):
  319. if not self.db:
  320. msg = '未提供数据库连接'
  321. print(msg)
  322. return msg
  323. name = self.getName()
  324. class_name = '&'.join([cl['type_name'] for cl in classes])
  325. class_url = '&'.join([cl['type_id'] for cl in classes])
  326. # data = RuleClass.query.filter(RuleClass.name == '555影视').all()
  327. # self.db.metadata.clear()
  328. # RuleClass = rule_classes.init(self.db)
  329. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  330. # print(res)
  331. if res:
  332. res.class_name = class_name
  333. res.class_url = class_url
  334. self.db.session.add(res)
  335. msg = f'{self.getName()}修改成功:{res.id}'
  336. else:
  337. res = self.RuleClass(name=name, class_name=class_name, class_url=class_url)
  338. self.db.session.add(res)
  339. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  340. msg = f'{self.getName()}新增成功:{res.id}'
  341. try:
  342. self.db.session.commit()
  343. logger.info(msg)
  344. except Exception as e:
  345. return f'发生了错误:{e}'
  346. def getParse(self,play_url):
  347. if not self.db:
  348. msg = '未提供数据库连接'
  349. print(msg)
  350. return ''
  351. name = self.getName()
  352. # self.db.metadata.clear()
  353. # RuleClass = rule_classes.init(self.db)
  354. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  355. # _logger.info('xxxxxx')
  356. if res:
  357. real_url = res.real_url
  358. logger.info(f"{name}使用缓存播放地址:{real_url}")
  359. return real_url
  360. else:
  361. return ''
  362. def dealJson(self,html):
  363. try:
  364. # res = re.search('.*?\{(.*)\}',html,re.M|re.I).groups()[0]
  365. res = re.search('.*?\{(.*)\}',html,re.M|re.S).groups()[0]
  366. html = '{' + res + '}'
  367. return html
  368. except:
  369. return html
  370. def checkHtml(self,r):
  371. r.encoding = self.encoding
  372. html = r.text
  373. if html.find('?btwaf=') > -1:
  374. btwaf = re.search('btwaf(.*?)"',html,re.M|re.I).groups()[0]
  375. url = r.url.split('#')[0]+'?btwaf'+btwaf
  376. # print(f'需要过宝塔验证:{url}')
  377. cookies_dict = requests.utils.dict_from_cookiejar(r.cookies)
  378. cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
  379. self.headers['cookie'] = cookie_str
  380. r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
  381. r.encoding = self.encoding
  382. html = r.text
  383. if html.find('?btwaf=') < 0:
  384. self.saveCookie(cookie_str)
  385. # print(html)
  386. return html
  387. def saveParse(self, play_url,real_url):
  388. if not self.db:
  389. msg = '未提供数据库连接'
  390. print(msg)
  391. return msg
  392. name = self.getName()
  393. # data = RuleClass.query.filter(RuleClass.name == '555影视').all()
  394. # self.db.metadata.clear()
  395. # RuleClass = rule_classes.init(self.db)
  396. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  397. # print(res)
  398. if res:
  399. res.real_url = real_url
  400. self.db.session.add(res)
  401. msg = f'{name}服务端免嗅修改成功:{res.id}'
  402. else:
  403. res = self.PlayParse(play_url=play_url, real_url=real_url)
  404. self.db.session.add(res)
  405. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  406. msg = f'{name}服务端免嗅新增成功:{res.id}'
  407. try:
  408. self.db.session.commit()
  409. logger.info(msg)
  410. except Exception as e:
  411. return f'{name}发生了错误:{e}'
  412. def homeContent(self,fypage=1):
  413. # yanaifei
  414. # https://yanetflix.com/vodtype/dianying.html
  415. t1 = time()
  416. result = {}
  417. classes = []
  418. video_result = self.blank()
  419. if self.class_url and self.class_name:
  420. class_names = self.class_name.split('&')
  421. class_urls = self.class_url.split('&')
  422. cnt = min(len(class_urls), len(class_names))
  423. for i in range(cnt):
  424. classes.append({
  425. 'type_name': class_names[i],
  426. 'type_id': class_urls[i]
  427. })
  428. # print(self.url)
  429. print(self.headers)
  430. has_cache = False
  431. # print(self.homeUrl)
  432. if self.homeUrl.startswith('http'):
  433. # print(self.class_parse)
  434. try:
  435. if self.class_parse:
  436. t2 = time()
  437. cache_classes = self.getClasses()
  438. logger.info(f'{self.getName()}读取缓存耗时:{get_interval(t2)}毫秒')
  439. if len(cache_classes) > 0:
  440. classes = cache_classes
  441. # print(cache_classes)
  442. has_cache = True
  443. # logger.info(f'是否有缓存分类:{has_cache}')
  444. if has_cache and not self.推荐:
  445. pass
  446. else:
  447. new_classes = []
  448. r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout,verify=False)
  449. html = self.checkHtml(r)
  450. # print(html)
  451. # print(self.headers)
  452. if self.class_parse and not has_cache:
  453. p = self.class_parse.split(';')
  454. # print(p[0])
  455. # print(html)
  456. jsp = jsoup(self.url)
  457. pdfh = jsp.pdfh
  458. pdfa = jsp.pdfa
  459. pd = jsp.pd
  460. items = pdfa(html,p[0])
  461. # print(len(items))
  462. # print(items)
  463. for item in items:
  464. title = pdfh(item, p[1])
  465. # 过滤排除掉标题名称
  466. if self.cate_exclude and jsp.test(self.cate_exclude, title):
  467. continue
  468. url = pd(item, p[2])
  469. # print(url)
  470. tag = url
  471. if len(p) > 3 and p[3].strip():
  472. try:
  473. tag = self.regexp(p[3].strip(),url,0)
  474. except:
  475. logger.info(f'分类匹配错误:{title}对应的链接{url}无法匹配{p[3]}')
  476. continue
  477. new_classes.append({
  478. 'type_name': title,
  479. 'type_id': tag
  480. })
  481. if len(new_classes) > 0:
  482. classes.extend(new_classes)
  483. self.saveClass(classes)
  484. video_result = self.homeVideoContent(html,fypage)
  485. except Exception as e:
  486. logger.info(f'{self.getName()}主页发生错误:{e}')
  487. classes = list(filter(lambda x:not self.cate_exclude or not jsoup(self.url).test(self.cate_exclude, x['type_name']),classes))
  488. result['class'] = classes
  489. if self.filter:
  490. if isinstance(self.filter,dict):
  491. result['filters'] = self.filter
  492. else:
  493. result['filters'] = playerConfig['filter']
  494. result.update(video_result)
  495. # print(result)
  496. logger.info(f'{self.getName()}获取首页总耗时(包含读取缓存):{get_interval(t1)}毫秒')
  497. return result
  498. def homeVideoContent(self,html,fypage=1):
  499. p = self.推荐
  500. if not p:
  501. return self.blank()
  502. jsp = jsoup(self.homeUrl)
  503. result = {}
  504. videos = []
  505. is_js = isinstance(p, str) and str(p).strip().startswith('js:') # 是js
  506. if is_js:
  507. headers['Referer'] = getHome(self.host)
  508. py_ctx.update({
  509. 'input': self.homeUrl,
  510. 'HOST': self.host,
  511. 'TYPE': 'home', # 海阔js环境标志
  512. 'oheaders':self.d.oheaders,
  513. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  514. 'd': self.d,
  515. 'getParse': self.d.getParse,
  516. 'saveParse': self.d.saveParse,
  517. 'jsp': jsp,'jq':jsp,'setDetail': setDetail,
  518. })
  519. ctx = py_ctx
  520. jscode = getPreJs() + p.strip().replace('js:', '', 1)
  521. # print(jscode)
  522. try:
  523. loader, _ = runJScode(jscode, ctx=ctx)
  524. # print(loader.toString())
  525. vods = loader.eval('VODS')
  526. # print(vods)
  527. if isinstance(vods, JsObjectWrapper):
  528. videos = vods.to_list()
  529. except Exception as e:
  530. logger.info(f'首页推荐执行js获取列表出错:{e}')
  531. else:
  532. if p == '*' and self.一级:
  533. p = self.一级
  534. self.double = False
  535. logger.info(f'首页推荐继承一级: {p}')
  536. p = p.strip().split(';') # 解析
  537. if not self.double and len(p) < 5:
  538. return self.blank()
  539. if self.double and len(p) < 6:
  540. return self.blank()
  541. jsp = jsoup(self.homeUrl)
  542. pp = self.一级.split(';')
  543. def getPP(p,pn,pp,ppn):
  544. try:
  545. ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
  546. return ps
  547. except Exception as e:
  548. return ''
  549. p0 = getPP(p,0,pp,0)
  550. is_json = str(p0).startswith('json:')
  551. if is_json:
  552. html = self.dealJson(html)
  553. pdfh = jsp.pjfh if is_json else jsp.pdfh
  554. pdfa = jsp.pjfa if is_json else jsp.pdfa
  555. pd = jsp.pj if is_json else jsp.pd
  556. # print(html)
  557. try:
  558. if self.double:
  559. items = pdfa(html, p0.replace('json:',''))
  560. # print(p[0])
  561. # print(items)
  562. # print(len(items))
  563. p1 = getPP(p, 1, pp, 0)
  564. p2 = getPP(p, 2, pp, 1)
  565. p3 = getPP(p, 3, pp, 2)
  566. p4 = getPP(p, 4, pp, 3)
  567. p5 = getPP(p, 5, pp, 4)
  568. p6 = getPP(p, 6, pp, 5)
  569. for item in items:
  570. items2 = pdfa(item,p1)
  571. # print(len(items2))
  572. for item2 in items2:
  573. try:
  574. title = pdfh(item2, p2)
  575. # print(title)
  576. try:
  577. img = pd(item2, p3)
  578. except:
  579. img = ''
  580. try:
  581. desc = pdfh(item2, p4)
  582. except:
  583. desc = ''
  584. links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in p5.split('+')]
  585. vid = '$'.join(links)
  586. if len(p) > 6 and p[6]:
  587. content = pdfh(item2, p6)
  588. else:
  589. content = ''
  590. if self.二级 == '*':
  591. vid = vid + '@@' + title + '@@' + img
  592. videos.append({
  593. "vod_id": vid,
  594. "vod_name": title,
  595. "vod_pic": img,
  596. "vod_remarks": desc,
  597. "no_use":{
  598. "vod_content": content,
  599. "type_id": 1,
  600. "type_name": "首页推荐",
  601. },
  602. })
  603. except:
  604. pass
  605. else:
  606. items = pdfa(html, p0.replace('json:',''))
  607. # print(items)
  608. p1 = getPP(p, 1, pp, 1)
  609. p2 = getPP(p, 2, pp, 2)
  610. p3 = getPP(p, 3, pp, 3)
  611. p4 = getPP(p, 4, pp, 4)
  612. p5 = getPP(p, 5, pp, 5)
  613. for item in items:
  614. try:
  615. title = pdfh(item, p1)
  616. try:
  617. img = pd(item, p2)
  618. except:
  619. img = ''
  620. try:
  621. desc = pdfh(item, p3)
  622. except:
  623. desc = ''
  624. # link = pd(item, p[4])
  625. links = [pd(item, _p5) if not self.detailUrl else pdfh(item, _p5) for _p5 in p4.split('+')]
  626. vid = '$'.join(links)
  627. if len(p) > 5 and p[5]:
  628. content = pdfh(item, p5)
  629. else:
  630. content = ''
  631. if self.二级 == '*':
  632. vid = vid + '@@' + title + '@@' + img
  633. videos.append({
  634. "vod_id": vid,
  635. "vod_name": title,
  636. "vod_pic": img,
  637. "vod_remarks": desc,
  638. "no_use": {
  639. "vod_content": content,
  640. "type_id": 1,
  641. "type_name": "首页推荐",
  642. },
  643. })
  644. except:
  645. pass
  646. # result['list'] = videos[min((fypage-1)*self.limit,len(videos)-1):min(fypage*self.limit,len(videos))]
  647. except Exception as e:
  648. logger.info(f'首页内容获取失败:{e}')
  649. return self.blank()
  650. if self.图片来源:
  651. for video in videos:
  652. if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'):
  653. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  654. result['list'] = videos
  655. # print(videos)
  656. result['no_use'] = {
  657. 'code': 1,
  658. 'msg': '数据列表',
  659. 'page': fypage,
  660. 'pagecount': math.ceil(len(videos) / self.limit),
  661. 'limit': self.limit,
  662. 'total': len(videos),
  663. 'now_count': len(result['list']),
  664. }
  665. # print(result)
  666. return result
  667. def categoryContent(self, fyclass, fypage, fl=None):
  668. """
  669. 一级带分类的数据返回
  670. :param fyclass: 分类标识
  671. :param fypage: 页码
  672. :param fl: 筛选
  673. :return: cms一级数据
  674. """
  675. if fl is None:
  676. fl = {}
  677. # print(f'fl:{fl}')
  678. if self.filter_def and isinstance(self.filter_def,dict):
  679. try:
  680. if self.filter_def.get(fyclass) and isinstance(self.filter_def[fyclass],dict):
  681. self_filter_def = self.filter_def[fyclass]
  682. filter_def = ujson.loads(ujson.dumps(self_filter_def))
  683. filter_def.update(fl)
  684. fl = filter_def
  685. except Exception as e:
  686. print(f'合并不同分类对应的默认筛选出错:{e}')
  687. # print(fl)
  688. result = {}
  689. # urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""]
  690. # urlParams = [""] * 12
  691. # urlParams[0] = tid
  692. # urlParams[8] = str(pg)
  693. # for key in self.extend:
  694. # urlParams[int(key)] = self.extend[key]
  695. # params = '-'.join(urlParams)
  696. # print(params)
  697. # url = self.url + '/{0}.html'.format
  698. t1 = time()
  699. pg = str(fypage)
  700. url = self.url.replace('fyclass',fyclass)
  701. if fypage == 1 and self.test('[\[\]]',url):
  702. url = url.split('[')[1].split(']')[0]
  703. elif fypage > 1 and self.test('[\[\]]',url):
  704. url = url.split('[')[0]
  705. if self.filter_url:
  706. if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
  707. if not url.endswith('&') and not self.filter_url.startswith('&'):
  708. url += '&'
  709. url += self.filter_url
  710. else: # 第二种情况直接替换关键字为待拼接的结果后面渲染,适用于 ----fypage.html的情况
  711. url = url.replace('fyfilter', self.filter_url)
  712. # print(f'url渲染:{url}')
  713. url = render_template_string(url,fl=fl)
  714. # fl_url = render_template_string(self.filter_url,fl=fl)
  715. # if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
  716. # if not url.endswith('&') and not fl_url.startswith('&'):
  717. # url += '&'
  718. # url += fl_url
  719. # else: # 第二种情况直接替换关键字为渲染后的结果,适用于 ----fypage.html的情况
  720. # url = url.replace('fyfilter',fl_url)
  721. if url.find('fypage') > -1:
  722. if '(' in url and ')' in url:
  723. # url_rep = url[url.find('('):url.find(')')+1]
  724. # cnt_page = url.split('(')[1].split(')')[0].replace('fypage',pg)
  725. # print(url_rep)
  726. url_rep = re.search('.*?\((.*)\)',url,re.M|re.S).groups()[0]
  727. cnt_page = url_rep.replace('fypage', pg)
  728. # print(url_rep)
  729. # print(cnt_page)
  730. cnt_ctx = {}
  731. exec(f'cnt_pg={cnt_page}', cnt_ctx)
  732. cnt_pg = str(cnt_ctx['cnt_pg']) # 计算表达式的结果
  733. url = url.replace(url_rep,str(cnt_pg)).replace('(','').replace(')','')
  734. # print(url)
  735. else:
  736. url = url.replace('fypage',pg)
  737. # print(url)
  738. logger.info(url)
  739. p = self.一级
  740. jsp = jsoup(self.url)
  741. videos = []
  742. is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
  743. if is_js:
  744. headers['Referer'] = getHome(url)
  745. py_ctx.update({
  746. 'input': url,
  747. 'TYPE': 'cate', # 海阔js环境标志
  748. 'oheaders': self.d.oheaders,
  749. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  750. 'd': self.d,
  751. 'MY_CATE':fyclass, # 分类id
  752. 'MY_FL':fl, # 筛选
  753. 'MY_PAGE':fypage, # 页数
  754. 'detailUrl':self.detailUrl or '', # 详情页链接
  755. 'getParse': self.d.getParse,
  756. 'saveParse': self.d.saveParse,
  757. 'jsp': jsp,'jq':jsp, 'setDetail': setDetail,
  758. })
  759. ctx = py_ctx
  760. # print(ctx)
  761. jscode = getPreJs() + p.replace('js:', '', 1)
  762. # print(jscode)
  763. loader, _ = runJScode(jscode, ctx=ctx)
  764. # print(loader.toString())
  765. vods = loader.eval('VODS')
  766. # print('vods:',vods)
  767. if isinstance(vods, JsObjectWrapper):
  768. videos = vods.to_list()
  769. else:
  770. p = p.split(';') # 解析
  771. # print(len(p))
  772. # print(p)
  773. if len(p) < 5:
  774. return self.blank()
  775. is_json = str(p[0]).startswith('json:')
  776. pdfh = jsp.pjfh if is_json else jsp.pdfh
  777. pdfa = jsp.pjfa if is_json else jsp.pdfa
  778. pd = jsp.pj if is_json else jsp.pd
  779. # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text'))
  780. # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)'))
  781. # print(pdfh(r.text,'body a.module-poster-item.module-item:first'))
  782. items = []
  783. try:
  784. r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
  785. html = self.checkHtml(r)
  786. print(self.headers)
  787. # print(html)
  788. if is_json:
  789. html = self.dealJson(html)
  790. html = json.loads(html)
  791. # else:
  792. # soup = bs4.BeautifulSoup(html, 'lxml')
  793. # html = soup.prettify()
  794. # print(html)
  795. # with open('1.html',mode='w+',encoding='utf-8') as f:
  796. # f.write(html)
  797. items = pdfa(html,p[0].replace('json:','',1))
  798. except:
  799. pass
  800. # print(items)
  801. for item in items:
  802. # print(item)
  803. try:
  804. title = pdfh(item, p[1])
  805. img = pd(item, p[2])
  806. desc = pdfh(item, p[3])
  807. links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
  808. link = '$'.join(links)
  809. content = '' if len(p) < 6 else pdfh(item, p[5])
  810. # sid = self.regStr(sid, "/video/(\\S+).html")
  811. vod_id = f'{fyclass}${link}' if self.detailUrl else link # 分类,播放链接
  812. if self.二级 == '*':
  813. vod_id = vod_id+'@@'+title+'@@'+img
  814. videos.append({
  815. "vod_id": vod_id,
  816. "vod_name": title,
  817. "vod_pic": img,
  818. "vod_remarks": desc,
  819. "vod_content": content,
  820. })
  821. except Exception as e:
  822. print(f'发生了错误:{e}')
  823. pass
  824. if self.图片来源:
  825. for video in videos:
  826. if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'):
  827. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  828. print('videos:',videos)
  829. limit = 40
  830. cnt = 9999 if len(videos) > 0 else 0
  831. pagecount = 0
  832. if self.pagecount and isinstance(self.pagecount,dict) and fyclass in self.pagecount:
  833. print(f'fyclass:{fyclass},self.pagecount:{self.pagecount}')
  834. pagecount = int(self.pagecount[fyclass])
  835. result['list'] = videos
  836. result['page'] = fypage
  837. result['pagecount'] = pagecount or max(cnt,fypage)
  838. result['limit'] = limit
  839. result['total'] = cnt
  840. # print(result)
  841. # print(result['pagecount'])
  842. logger.info(f'{self.getName()}获取分类{fyclass}第{fypage}页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb')
  843. nodata = {
  844. 'list': [{'vod_name': '无数据,防无限请求', 'vod_id': 'no_data', 'vod_remarks': '不要点,会崩的',
  845. 'vod_pic': 'https://ghproxy.com/https://raw.githubusercontent.com/hjdhnx/dr_py/main/404.jpg'}],
  846. 'total': 1, 'pagecount': 1, 'page': 1, 'limit': 1
  847. }
  848. # return result
  849. return result if len(result['list']) > 0 else nodata
  850. def 二级渲染(self,parse_str:'str|dict',**kwargs):
  851. # *args是不定长参数 列表
  852. # ** args是不定长参数字典
  853. p = parse_str # 二级传递解析表达式 js的obj json对象
  854. detailUrl = kwargs.get('detailUrl','') # 不定长字典传递的二级详情页vod_id详情处理数据
  855. orId = kwargs.get('orId','') # 不定长字典传递的二级详情页vod_id原始数据
  856. url = kwargs.get('url','') # 不定长字典传递的二级详情页链接智能拼接数据
  857. vod = kwargs.get('vod',self.blank_vod()) # 最终要返回的二级详情页数据 默认空
  858. html = kwargs.get('html','') # 不定长字典传递的源码(如果不传才会在下面程序中去获取)
  859. show_name = kwargs.get('show_name','') # 是否显示来源(用于drpy区分)
  860. jsp = kwargs.get('jsp','') # jsp = jsoup(self.url) 传递的jsp解析
  861. fyclass = kwargs.get('fyclass','') # 二级传递的分类名称,可以得知进去的类别
  862. play_url = self.play_url
  863. vod_name = '片名'
  864. vod_pic = ''
  865. # print('二级url:',url)
  866. if self.二级 == '*':
  867. extra = orId.split('@@')
  868. vod_name = extra[1] if len(extra) > 1 else vod_name
  869. vod_pic = extra[2] if len(extra) > 2 else vod_pic
  870. if self.play_json:
  871. play_url = play_url.replace('&play_url=', '&type=json&play_url=')
  872. if p == '*': # 解析表达式为*默认一级直接变播放
  873. vod['vod_play_from'] = '道长在线'
  874. vod['vod_remarks'] = detailUrl
  875. vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放'
  876. # vod['vod_content'] = url if not show_name else f'({self.id}) {url}'
  877. vod['vod_content'] = url
  878. vod['vod_id'] = orId
  879. vod['vod_name'] = vod_name
  880. vod['vod_pic'] = vod_pic
  881. vod['vod_play_url'] = '嗅探播放$' + play_url + url.split('@@')[0]
  882. elif not p or (not isinstance(p, dict) and not isinstance(p, str)) or (isinstance(p, str) and not str(p).startswith('js:')):
  883. pass
  884. else:
  885. is_json = p.get('is_json', False) if isinstance(p, dict) else False # 二级里加is_json参数
  886. pdfh = jsp.pjfh if is_json else jsp.pdfh
  887. pdfa = jsp.pjfa if is_json else jsp.pdfa
  888. pd = jsp.pj if is_json else jsp.pd
  889. pq = jsp.pq
  890. vod['vod_id'] = orId
  891. if not html: # 没传递html参数接detailUrl下来智能获取
  892. r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
  893. html = self.checkHtml(r)
  894. if is_json:
  895. html = self.dealJson(html)
  896. html = json.loads(html)
  897. tt1 = time()
  898. if p.get('title'):
  899. p1 = p['title'].split(';')
  900. vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip()
  901. vod['type_name'] = pdfh(html, p1[1]).replace('\n',' ').strip() if len(p1)>1 else ''
  902. if p.get('desc'):
  903. try:
  904. p1 = p['desc'].split(';')
  905. vod['vod_remarks'] = pdfh(html, p1[0]).replace('\n', '').strip()
  906. vod['vod_year'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else ''
  907. vod['vod_area'] = pdfh(html, p1[2]).replace('\n', ' ').strip() if len(p1) > 2 else ''
  908. vod['vod_actor'] = pdfh(html, p1[3]).replace('\n', ' ').strip() if len(p1) > 3 else ''
  909. vod['vod_director'] = pdfh(html, p1[4]).replace('\n', ' ').strip() if len(p1) > 4 else ''
  910. except:
  911. pass
  912. if p.get('content'):
  913. p1 = p['content'].split(';')
  914. try:
  915. content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
  916. vod['vod_content'] = content
  917. except:
  918. pass
  919. if p.get('img'):
  920. p1 = p['img']
  921. try:
  922. img = pd(html, p1)
  923. vod['vod_pic'] = img
  924. except Exception as e:
  925. logger.info(f'二级图片定位失败,但不影响使用{e}')
  926. vod_play_from = '$$$'
  927. playFrom = []
  928. init_flag = {'ctx':False}
  929. def js_pre():
  930. headers['Referer'] = getHome(url)
  931. py_ctx.update({
  932. 'input': url,
  933. 'html': html,
  934. 'TYPE': 'detail', # 海阔js环境标志
  935. 'MY_CATE': fyclass, # 分类id
  936. 'oheaders': self.d.oheaders,
  937. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  938. 'd': self.d,
  939. 'getParse': self.d.getParse,
  940. 'saveParse': self.d.saveParse,
  941. 'jsp': jsp,'jq':jsp, 'setDetail': setDetail,'play_url':play_url
  942. })
  943. init_flag['ctx'] = True
  944. if p.get('重定向') and str(p['重定向']).startswith('js:'):
  945. if not init_flag['ctx']:
  946. js_pre()
  947. ctx = py_ctx
  948. # print(ctx)
  949. rcode = p['重定向'].replace('js:', '', 1)
  950. jscode = getPreJs() + rcode
  951. # print(jscode)
  952. loader, _ = runJScode(jscode, ctx=ctx)
  953. # print(loader.toString())
  954. logger.info(f'开始执行二级重定向代码:{rcode}')
  955. html = loader.eval('html')
  956. if isinstance(vod, JsObjectWrapper):
  957. html = str(html)
  958. if p.get('tabs'):
  959. vodHeader = []
  960. if str(p['tabs']).startswith('js:'):
  961. if not init_flag['ctx']:
  962. js_pre()
  963. ctx = py_ctx
  964. rcode = p['tabs'].replace('js:', '', 1)
  965. jscode = getPreJs() + rcode
  966. # print(jscode)
  967. loader, _ = runJScode(jscode, ctx=ctx)
  968. # print(loader.toString())
  969. logger.info(f'开始执行tabs代码:{rcode}')
  970. vHeader = loader.eval('TABS')
  971. if isinstance(vod, JsObjectWrapper):
  972. vHeader = vHeader.to_list()
  973. vodHeader = vHeader
  974. else:
  975. tab_parse = p['tabs'].split(';')[0]
  976. # print('tab_parse:',tab_parse)
  977. vHeader = pdfa(html, tab_parse)
  978. # print(vHeader)
  979. print(f'二级线路定位列表数:{len((vHeader))}')
  980. # print(vHeader[0].outerHtml())
  981. # print(vHeader[0].toString())
  982. # from lxml import etree
  983. # print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8'))
  984. from lxml.html import tostring as html2str
  985. # print(html2str(vHeader[0].root).decode('utf-8'))
  986. tab_text = p.get('tab_text','') or 'body&&Text'
  987. # print('tab_text:'+tab_text)
  988. if not is_json:
  989. for v in vHeader:
  990. # 过滤排除掉线路标题
  991. # v_title = pq(v).text()
  992. v_title = pdfh(v,tab_text).strip()
  993. # print(v_title)
  994. if self.tab_exclude and jsp.test(self.tab_exclude, v_title):
  995. continue
  996. vodHeader.append(v_title)
  997. else:
  998. vodHeader = vHeader
  999. print(f'过滤后真实线路列表数:{len((vodHeader))} {vodHeader}')
  1000. else:
  1001. vodHeader = ['道长在线']
  1002. # print(vodHeader)
  1003. # print(vod)
  1004. new_map = {}
  1005. for v in vodHeader:
  1006. if not v in new_map:
  1007. new_map[v] = 1
  1008. else:
  1009. new_map[v] += 1
  1010. if new_map[v] > 1:
  1011. v = f'{v}{new_map[v]-1}'
  1012. playFrom.append(v)
  1013. vod_play_from = vod_play_from.join(playFrom)
  1014. vod_play_url = '$$$'
  1015. vod_tab_list = []
  1016. if p.get('lists'):
  1017. if str(p['lists']).startswith('js:'):
  1018. if not init_flag['ctx']:
  1019. js_pre()
  1020. ctx = py_ctx
  1021. ctx['TABS'] = vodHeader # 把选集列表传过去
  1022. rcode = p['lists'].replace('js:', '', 1)
  1023. jscode = getPreJs() + rcode
  1024. # print(jscode)
  1025. loader, _ = runJScode(jscode, ctx=ctx)
  1026. # print(loader.toString())
  1027. logger.info(f'开始执行lists代码:{rcode}')
  1028. vlists = loader.eval('LISTS')
  1029. if isinstance(vod, JsObjectWrapper):
  1030. vlists = vlists.to_list() # [['第1集$http://1.mp4','第2集$http://2.mp4'],['第3集$http://1.mp4','第4集$http://2.mp4']]
  1031. for i in range(len(vlists)):
  1032. try:
  1033. vlists[i] = list(map(lambda x:'$'.join(x.split('$')[:2]),vlists[i]))
  1034. except Exception as e:
  1035. logger.info(f'LISTS格式化发生错误:{e}')
  1036. vod_play_url = vod_play_url.join(list(map(lambda x:'#'.join(x),vlists)))
  1037. else:
  1038. list_text = p.get('list_text','') or 'body&&Text'
  1039. list_url = p.get('list_url','') or 'a&&href'
  1040. print('list_text:' + list_text)
  1041. print('list_url:' + list_url)
  1042. is_tab_js = p['tabs'].strip().startswith('js:')
  1043. for i in range(len(vodHeader)):
  1044. tab_name = str(vodHeader[i])
  1045. # print(tab_name)
  1046. tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 and not is_tab_js else ''
  1047. p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i))
  1048. tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i))
  1049. # print(p1)
  1050. vodList = pdfa(html, p1) # 1条线路的选集列表
  1051. # print(vodList)
  1052. # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接
  1053. # pq(i).text()
  1054. if self.play_parse: # 自动base64编码
  1055. vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + encodeUrl(i) for i
  1056. in vodList] if is_json else \
  1057. [pdfh(i,list_text) + '$' + play_url + encodeUrl(pd(i, list_url)) for i in vodList] # 拼接成 名称$链接
  1058. else:
  1059. vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + i for i in
  1060. vodList] if is_json else \
  1061. [pdfh(i,list_text) + '$' + play_url + pd(i, list_url) for i in vodList] # 拼接成 名称$链接
  1062. # print(vodList)
  1063. vodList = forceOrder(vodList,option=lambda x:x.split('$')[0])
  1064. # print(vodList)
  1065. vlist = '#'.join(vodList) # 拼多个选集
  1066. # print(vlist)
  1067. vod_tab_list.append(vlist)
  1068. vod_play_url = vod_play_url.join(vod_tab_list)
  1069. vod_play_url_str = vod_play_url[:min(len(vod_play_url),500)]
  1070. print(vod_play_url_str)
  1071. vod['vod_play_from'] = vod_play_from
  1072. # print(vod_play_from)
  1073. vod['vod_play_url'] = vod_play_url
  1074. logger.info(f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb')
  1075. if show_name:
  1076. vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}'
  1077. return vod
  1078. def detailOneVod(self,id,fyclass='',show_name=False):
  1079. vod = self.blank_vod()
  1080. orId = str(id)
  1081. orUrl = orId
  1082. if fyclass:
  1083. orUrl = f'{fyclass}${orId}'
  1084. detailUrl = orId.split('@@')[0]
  1085. # print(detailUrl)
  1086. if not detailUrl.startswith('http') and not '/' in detailUrl:
  1087. url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass)
  1088. # print(url)
  1089. elif '/' in detailUrl:
  1090. url = urljoin(self.homeUrl,detailUrl)
  1091. else:
  1092. url = detailUrl
  1093. if self.二级访问前:
  1094. logger.info(f'尝试在二级访问前执行代码: {self.二级访问前}')
  1095. py_ctx.update({
  1096. 'MY_URL': url,
  1097. 'oheaders': self.d.oheaders,
  1098. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1099. 'd': self.d,
  1100. })
  1101. ctx = py_ctx
  1102. jscode = getPreJs() + self.二级访问前.replace('js:', '', 1)
  1103. # print(jscode)
  1104. loader, _ = runJScode(jscode, ctx=ctx)
  1105. try:
  1106. MY_URL = loader.eval('MY_URL')
  1107. if isinstance(MY_URL, JsObjectWrapper):
  1108. MY_URL = str(MY_URL)
  1109. if MY_URL:
  1110. url = MY_URL
  1111. except Exception as e:
  1112. logger.info(f'执行二级访问前发生错误: {e}')
  1113. logger.info(f'进入详情页: {url}')
  1114. try:
  1115. p = self.二级 # 解析
  1116. jsp = jsoup(url) if url.startswith('http') else jsoup(self.url)
  1117. is_js = isinstance(p,str) and str(p).startswith('js:') # 是js
  1118. if is_js:
  1119. headers['Referer'] = getHome(url)
  1120. play_url = self.play_url
  1121. if self.play_json:
  1122. play_url = play_url.replace('&play_url=', '&type=json&play_url=')
  1123. py_ctx.update({
  1124. 'input': url,
  1125. 'TYPE': 'detail', # 海阔js环境标志
  1126. # 'VID': id, # 传递的vod_id
  1127. '二级': self.二级渲染, # 二级解析函数,可以解析dict
  1128. 'MY_CATE': fyclass, # 分类id
  1129. 'oheaders': self.d.oheaders,
  1130. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1131. 'd': self.d,
  1132. 'getParse': self.d.getParse,
  1133. 'saveParse': self.d.saveParse,
  1134. 'jsp':jsp,'jq':jsp,'setDetail':setDetail,'play_url':play_url
  1135. })
  1136. ctx = py_ctx
  1137. # print(ctx)
  1138. jscode = getPreJs() + p.replace('js:','',1)
  1139. # print(jscode)
  1140. loader, _ = runJScode(jscode, ctx=ctx)
  1141. # print(loader.toString())
  1142. vod = loader.eval('VOD')
  1143. if isinstance(vod,JsObjectWrapper):
  1144. vod = vod.to_dict()
  1145. if show_name:
  1146. vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}'
  1147. else:
  1148. vod = self.blank_vod()
  1149. else:
  1150. vod = self.二级渲染(p,detailUrl=detailUrl,orId=orUrl,url=url,vod=vod,show_name=show_name,jsp=jsp,fyclass=fyclass)
  1151. except Exception as e:
  1152. logger.info(f'{self.getName()}获取单个详情页{detailUrl}出错{e}')
  1153. if self.图片来源:
  1154. if vod.get('vod_pic','') and str(vod['vod_pic']).startswith('http'):
  1155. vod['vod_pic'] = f"{vod['vod_pic']}{self.图片来源}"
  1156. if not vod.get('vod_id') or ('$' in orUrl and vod['vod_id']!=orUrl):
  1157. vod['vod_id'] = orUrl
  1158. # print(vod)
  1159. return vod
  1160. def detailContent(self, fypage, array,show_name=False):
  1161. """
  1162. cms二级数据
  1163. :param array:
  1164. :return:
  1165. """
  1166. # print('进入二级')
  1167. t1 = time()
  1168. array = array if len(array) <= self.limit else array[(fypage-1)*self.limit:min(self.limit*fypage,len(array))]
  1169. thread_pool = ThreadPoolExecutor(min(self.limit,len(array))) # 定义线程池来启动多线程执行此任务
  1170. obj_list = []
  1171. try:
  1172. for vod_url in array:
  1173. print(vod_url)
  1174. vod_class = ''
  1175. if vod_url.find('$') > -1:
  1176. tmp = vod_url.split('$')
  1177. vod_class = tmp[0]
  1178. vod_url = tmp[1]
  1179. obj = thread_pool.submit(self.detailOneVod, vod_url,vod_class,show_name)
  1180. obj_list.append(obj)
  1181. thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕
  1182. vod_list = [obj.result() for obj in obj_list]
  1183. result = {
  1184. 'list': vod_list
  1185. }
  1186. logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb')
  1187. except Exception as e:
  1188. result = {
  1189. 'list': []
  1190. }
  1191. logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,发生错误:{e}')
  1192. # print(result)
  1193. return result
  1194. def searchContent(self, key, fypage=1,show_name=False):
  1195. if self.search_encoding:
  1196. if str(self.search_encoding).lower() != 'utf-8':
  1197. key = encodeStr(key,self.search_encoding)
  1198. elif self.encoding and str(self.encoding).startswith('gb'):
  1199. # key = quote(key.encode('utf-8').decode('utf-8').encode(self.encoding,'ignore'))
  1200. key = encodeStr(key,self.encoding)
  1201. # print(key)
  1202. pg = str(fypage)
  1203. if not self.searchUrl:
  1204. return self.blank()
  1205. url = self.searchUrl.replace('**', key).replace('fypage',pg)
  1206. logger.info(f'{self.getName()}搜索链接:{url}')
  1207. if not self.搜索:
  1208. return self.blank()
  1209. # p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
  1210. p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索
  1211. pp = self.一级.split(';')
  1212. jsp = jsoup(url) if url.startswith('http') else jsoup(self.url)
  1213. videos = []
  1214. is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
  1215. def getPP(p, pn, pp, ppn):
  1216. try:
  1217. ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
  1218. return ps
  1219. except:
  1220. return ''
  1221. if is_js:
  1222. headers['Referer'] = getHome(url)
  1223. py_ctx.update({
  1224. 'input': url,
  1225. 'oheaders': self.d.oheaders,
  1226. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1227. 'd': self.d,
  1228. 'MY_PAGE': fypage,
  1229. 'KEY': key, # 搜索关键字
  1230. 'TYPE': 'search', # 海阔js环境标志
  1231. 'detailUrl': self.detailUrl or '',
  1232. # 详情页链接
  1233. 'getParse': self.d.getParse,
  1234. 'saveParse': self.d.saveParse,
  1235. 'jsp': jsp,'jq':jsp, 'setDetail': setDetail,
  1236. })
  1237. ctx = py_ctx
  1238. # print(ctx)
  1239. jscode = getPreJs() + p.replace('js:', '', 1)
  1240. # print(jscode)
  1241. loader, _ = runJScode(jscode, ctx=ctx)
  1242. # print(loader.toString())
  1243. vods = loader.eval('VODS')
  1244. # print(len(vods),type(vods))
  1245. if isinstance(vods, JsObjectWrapper):
  1246. videos = vods.to_list()
  1247. # print(videos)
  1248. else:
  1249. p = p.split(';')
  1250. if len(p) < 5:
  1251. return self.blank()
  1252. is_json = str(p[0]).startswith('json:')
  1253. pdfh = jsp.pjfh if is_json else jsp.pdfh
  1254. pdfa = jsp.pjfa if is_json else jsp.pdfa
  1255. pd = jsp.pj if is_json else jsp.pd
  1256. pq = jsp.pq
  1257. try:
  1258. req_method = url.split(';')[1].lower() if len(url.split(';'))>1 else 'get'
  1259. if req_method == 'post':
  1260. rurls = url.split(';')[0].split('#')
  1261. rurl = rurls[0]
  1262. params = rurls[1] if len(rurls)>1 else ''
  1263. # params = quote(params)
  1264. print(f'rurl:{rurl},params:{params}')
  1265. new_dict = {}
  1266. new_tmp = params.split('&')
  1267. # print(new_tmp)
  1268. for i in new_tmp:
  1269. new_dict[i.split('=')[0]] = i.split('=')[1]
  1270. # data = ujson.dumps(new_dict)
  1271. data = new_dict
  1272. # print(data)
  1273. logger.info(self.headers)
  1274. r = requests.post(rurl, headers=self.headers,data=data, timeout=self.timeout, verify=False)
  1275. elif req_method == 'postjson':
  1276. rurls = url.split(';')[0].split('#')
  1277. rurl = rurls[0]
  1278. params = rurls[1] if len(rurls) > 1 else '{}'
  1279. headers_cp = self.headers.copy()
  1280. headers_cp.update({'Content-Type':'application/json'})
  1281. try:
  1282. params = ujson.dumps(ujson.loads(params))
  1283. except:
  1284. params = '{}'
  1285. # params = params.encode()
  1286. logger.info(headers_cp)
  1287. logger.info(params)
  1288. r = requests.post(rurl, headers=headers_cp, data=params, timeout=self.timeout, verify=False)
  1289. else:
  1290. r = requests.get(url, headers=self.headers,timeout=self.timeout,verify=False)
  1291. html = self.checkHtml(r)
  1292. if is_json:
  1293. html = self.dealJson(html)
  1294. html = json.loads(html)
  1295. # if not is_json and html.find('输入验证码') > -1:
  1296. if not is_json and re.search('系统安全验证|输入验证码',html,re.M|re.S):
  1297. cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
  1298. # cookie = ''
  1299. if not cookie:
  1300. return {
  1301. 'list': videos
  1302. }
  1303. self.saveCookie(cookie)
  1304. self.headers['cookie'] = cookie
  1305. r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
  1306. r.encoding = self.encoding
  1307. html = r.text
  1308. if not show_name and not str(html).find(key) > -1:
  1309. logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码')
  1310. print(html)
  1311. p0 = getPP(p,0,pp,0)
  1312. items = pdfa(html,p0.replace('json:','',1))
  1313. # print(len(items),items)
  1314. videos = []
  1315. p1 = getPP(p, 1, pp, 1)
  1316. p2 = getPP(p, 2, pp, 2)
  1317. p3 = getPP(p, 3, pp, 3)
  1318. p4 = getPP(p, 4, pp, 4)
  1319. p5 = getPP(p, 5, pp, 5)
  1320. for item in items:
  1321. # print(item)
  1322. try:
  1323. # title = pdfh(item, p[1])
  1324. title = ''.join([pdfh(item, i) for i in p1.split('||')])
  1325. try:
  1326. img = pd(item, p2)
  1327. except:
  1328. img = ''
  1329. try:
  1330. desc = pdfh(item, p3)
  1331. except:
  1332. desc = ''
  1333. if len(p) > 5 and p[5]:
  1334. content = pdfh(item, p5)
  1335. else:
  1336. content = ''
  1337. # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
  1338. links = [pd(item, _p4) if not self.detailUrl else pdfh(item, _p4) for _p4 in p4.split('+')]
  1339. link = '$'.join(links)
  1340. # print(content)
  1341. # sid = self.regStr(sid, "/video/(\\S+).html")
  1342. vod_id = link
  1343. if self.二级 == '*':
  1344. vod_id = vod_id + '@@' + title + '@@' + img
  1345. videos.append({
  1346. "vod_id": vod_id,
  1347. "vod_name": title,
  1348. "vod_pic": img,
  1349. "vod_remarks": desc,
  1350. "vod_content": content, # 无用参数
  1351. })
  1352. except Exception as e:
  1353. print(f'搜索列表解析发生错误:{e}')
  1354. pass
  1355. # print(videos)
  1356. except Exception as e:
  1357. logger.info(f'搜索{self.getName()}发生错误:{e}')
  1358. if self.图片来源:
  1359. for video in videos:
  1360. if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'):
  1361. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  1362. if show_name and len(videos) > 0:
  1363. for video in videos:
  1364. video['vod_name'] = self.id + ' '+video['vod_name']
  1365. video['vod_rule'] = self.id
  1366. video['vod_id'] = video['vod_id'] +'#' + self.id
  1367. result = {
  1368. 'list': videos
  1369. }
  1370. return result
  1371. def playContent(self, play_url,jxs=None,flag=None):
  1372. # flag参数只有类型为4的时候才有,可以忽略
  1373. # logger.info('播放免嗅地址: ' + self.play_url)
  1374. # 注意:全局flags里的视频没法执行免嗅代码,因为会自动拦截去调用解析: url=yoursite:5705/vod?play_url=xxxx
  1375. if not jxs:
  1376. jxs = []
  1377. # print(play_url)
  1378. if play_url.find('http') == -1: # 字符串看起来被编码的
  1379. try:
  1380. play_url = base64Decode(play_url) # 自动base64解码
  1381. except:
  1382. pass
  1383. # print(unquote(play_url))
  1384. play_url = unquote(play_url)
  1385. origin_play_url = play_url
  1386. print(origin_play_url)
  1387. if self.lazy:
  1388. print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}')
  1389. t1 = time()
  1390. try:
  1391. if type(self.lazy) == JsObjectWrapper:
  1392. logger.info(f'lazy非纯文本免嗅失败耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
  1393. elif str(self.lazy).startswith('py:'):
  1394. pycode = runPy(self.lazy)
  1395. if pycode:
  1396. # print(pycode)
  1397. pos = pycode.find('def lazyParse')
  1398. if pos < 0:
  1399. return play_url
  1400. pyenv = safePython(self.lazy,pycode[pos:])
  1401. lazy_url = pyenv.action_task_exec('lazyParse',[play_url,self.d])
  1402. logger.info(f'py免嗅耗时:{get_interval(t1)}毫秒,播放地址:{lazy_url}')
  1403. if isinstance(lazy_url,str) and lazy_url.startswith('http'):
  1404. play_url = lazy_url
  1405. else:
  1406. jscode = str(self.lazy).strip().replace('js:', '', 1) if str(self.lazy).startswith('js:') else js_code
  1407. jsp = jsoup(self.url)
  1408. # jscode = f'var input={play_url};{jscode}'
  1409. # print(jscode)
  1410. headers['Referer'] = getHome(play_url)
  1411. py_ctx.update({
  1412. 'input': play_url,
  1413. 'oheaders': self.d.oheaders,
  1414. 'fetch_params':{'headers':self.headers,'timeout':self.d.timeout,'encoding':self.d.encoding},
  1415. 'd': self.d,
  1416. 'jxs':jxs,
  1417. 'getParse':self.d.getParse,
  1418. 'saveParse':self.d.saveParse,
  1419. 'jsp': jsp,
  1420. 'jq': jsp,
  1421. 'pdfh': self.d.jsp.pdfh,
  1422. 'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd,'play_url':self.play_url
  1423. })
  1424. ctx = py_ctx
  1425. # print(ctx)
  1426. jscode = getPreJs() + jscode
  1427. # print(jscode)
  1428. loader,_ = runJScode(jscode,ctx=ctx)
  1429. # print(loader.toString())
  1430. play_url = loader.eval('input')
  1431. if isinstance(play_url,JsObjectWrapper):
  1432. play_url = play_url.to_dict()
  1433. # print(type(play_url))
  1434. # print(play_url)
  1435. logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
  1436. if not play_url and play_url!='' and play_url!={}:
  1437. play_url = origin_play_url
  1438. # if play_url == {}:
  1439. # play_url = None
  1440. except Exception as e:
  1441. logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}')
  1442. # return play_url
  1443. else:
  1444. logger.info(f'播放重定向到:{play_url}')
  1445. # return play_url
  1446. if self.play_json:
  1447. # 如果传了 play_json 参数并且是个大于0的列表的话
  1448. if isinstance(self.play_json,list) and len(self.play_json) > 0:
  1449. # 获取播放链接
  1450. web_url = play_url if isinstance(play_url,str) else play_url.get('url')
  1451. for pjson in self.play_json:
  1452. if pjson.get('re') and (pjson['re']=='*' or re.search(pjson['re'],web_url,re.S|re.M)):
  1453. if pjson.get('json') and isinstance(pjson['json'], dict):
  1454. if isinstance(play_url, str):
  1455. base_json = pjson['json']
  1456. base_json['url'] = web_url
  1457. play_url = base_json
  1458. elif isinstance(play_url, dict):
  1459. base_json = pjson['json']
  1460. play_url.update(base_json)
  1461. # 不管有没有效,匹配到了就跑??? (当然不行了,要不然写来干嘛)
  1462. break
  1463. else: # 没有指定列表默认表示需要解析,解析播放 (如果不要解析,我想也是没人会去写这个参数)
  1464. base_json = {
  1465. 'jx':1, # 解析开
  1466. 'parse':1, # 嗅探 关 pluto这个标识有问题 只好双1了
  1467. }
  1468. if isinstance(play_url,str):
  1469. base_json['url'] = play_url
  1470. play_url = base_json
  1471. elif isinstance(play_url,dict):
  1472. play_url.update(base_json)
  1473. logger.info(f'最终返回play_url:{play_url}')
  1474. return play_url
  1475. if __name__ == '__main__':
  1476. print(urljoin('https://api.web.360kan.com/v1/f',
  1477. '//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg'))
  1478. # exit()
  1479. from utils import parser
  1480. # js_path = f'js/玩偶姐姐.js'
  1481. # js_path = f'js/555影视.js'
  1482. with open('../js/模板.js', encoding='utf-8') as f:
  1483. before = f.read().split('export')[0]
  1484. js_path = f'js/360影视.js'
  1485. ctx, js_code = parser.runJs(js_path,before=before)
  1486. ruleDict = ctx.rule.to_dict()
  1487. # lazy = ctx.eval('lazy')
  1488. # print(lazy)
  1489. # ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
  1490. cms = CMS(ruleDict)
  1491. print(cms.title)
  1492. print(cms.homeContent())
  1493. # print(cms.categoryContent('5',1))
  1494. # print(cms.categoryContent('latest',1))
  1495. # print(cms.detailContent(['https://www.2345ka.com/v/45499.html']))
  1496. # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
  1497. # cms.categoryContent('dianying',1)
  1498. # print(cms.detailContent(['67391']))
  1499. # print(cms.searchContent('斗罗大陆'))
  1500. print(cms.searchContent('独行月球'))