cms.py 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : cms.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/8/25
  6. import json
  7. # import bs4
  8. import requests
  9. import re
  10. import math
  11. import ujson
  12. from utils.web import *
  13. from utils.system import getHost
  14. from utils.config import playerConfig
  15. from utils.log import logger
  16. from utils.encode import base64Encode, base64Decode, fetch, post, request, getCryptoJS, getPreJs, buildUrl, getHome, \
  17. atob, btoa
  18. from utils.encode import verifyCode, setDetail, join, urljoin2, parseText, requireCache, forceOrder, base64ToImage, \
  19. encodeStr, decodeStr
  20. from utils.encode import md5 as mmd5
  21. from utils.safePython import safePython, safe_eval
  22. from utils.parser import runPy, runJScode, JsObjectWrapper, PyJsObject, PyJsString
  23. from utils.htmlParser import jsoup
  24. from urllib.parse import urljoin, quote, unquote
  25. from concurrent.futures import ThreadPoolExecutor # 引入线程池
  26. from flask import url_for, redirect, render_template_string
  27. from easydict import EasyDict as edict
  28. from controllers.service import storage_service
  29. def setItem(key, value):
  30. lsg = storage_service()
  31. if isinstance(key, PyJsString):
  32. key = parseText(str(key))
  33. if isinstance(value, PyJsString):
  34. value = parseText(str(value))
  35. return lsg.setItem(key, value)
  36. def getItem(key, value=''):
  37. lsg = storage_service()
  38. if isinstance(key, PyJsString):
  39. key = parseText(str(key))
  40. if isinstance(value, PyJsString):
  41. value = parseText(str(value))
  42. return lsg.getItem(key, value)
  43. def clearItem(key):
  44. lsg = storage_service()
  45. if isinstance(key, PyJsString):
  46. key = parseText(str(key))
  47. return lsg.clearItem(key)
  48. def encodeUrl(url):
  49. # return base64Encode(quote(url))
  50. # return base64Encode(url)
  51. # print(type(url))
  52. if isinstance(url, PyJsString):
  53. # obj = obj.to_dict()
  54. url = parseText(str(url))
  55. return quote(url)
  56. def stringify(obj):
  57. if isinstance(obj, PyJsObject):
  58. # obj = obj.to_dict()
  59. obj = parseText(str(obj))
  60. return json.dumps(obj, separators=(',', ':'), ensure_ascii=False)
  61. def requireObj(url):
  62. if isinstance(url, PyJsString):
  63. url = parseText(str(url))
  64. return requireCache(url)
  65. def md5(text):
  66. if isinstance(text, PyJsString):
  67. text = parseText(str(text))
  68. return mmd5(text)
  69. py_ctx = {
  70. 'requests': requests, 'print': print, 'base64Encode': base64Encode, 'base64Decode': base64Decode,
  71. 'log': logger.info, 'fetch': fetch, 'post': post, 'request': request, 'getCryptoJS': getCryptoJS,
  72. 'buildUrl': buildUrl, 'getHome': getHome, 'setDetail': setDetail, 'join': join, 'urljoin2': urljoin2,
  73. 'PC_UA': PC_UA, 'MOBILE_UA': MOBILE_UA, 'UC_UA': UC_UA, 'UA': UA, 'IOS_UA': IOS_UA,
  74. 'setItem': setItem, 'getItem': getItem, 'clearItem': clearItem, 'stringify': stringify, 'encodeUrl': encodeUrl,
  75. 'requireObj': requireObj, 'md5': md5, 'atob': atob, 'btoa': btoa, 'base64ToImage': base64ToImage,
  76. 'encodeStr': encodeStr,
  77. 'decodeStr': decodeStr
  78. }
  79. # print(getCryptoJS())
  80. class CMS:
  81. def __init__(self, rule, db=None, RuleClass=None, PlayParse=None, new_conf=None, ext=''):
  82. if new_conf is None:
  83. new_conf = {}
  84. self.lsg = storage_service()
  85. self.title = rule.get('title', '')
  86. self.id = rule.get('id', self.title)
  87. self.filter_url = rule.get('filter_url', '').replace('{{fl}}', '{{fl|safe}}') # python jinjia2禁用自动编码
  88. cate_exclude = rule.get('cate_exclude', '')
  89. tab_exclude = rule.get('tab_exclude', '')
  90. self.lazy = rule.get('lazy', False)
  91. # self.play_disable = new_conf.get('PLAY_DISABLE',False)
  92. self.play_disable = self.lsg.getItem('PLAY_DISABLE', False)
  93. self.retry_count = new_conf.get('RETRY_CNT', 3)
  94. # self.lazy_mode = new_conf.get('LAZYPARSE_MODE')
  95. self.lazy_mode = self.lsg.getItem('LAZYPARSE_MODE', 2)
  96. self.ocr_api = new_conf.get('OCR_API')
  97. # self.cate_exclude = new_conf.get('CATE_EXCLUDE','')
  98. self.cate_exclude = self.lsg.getItem('CATE_EXCLUDE', '')
  99. # self.tab_exclude = new_conf.get('TAB_EXCLUDE','')
  100. self.tab_exclude = self.lsg.getItem('TAB_EXCLUDE', '')
  101. if cate_exclude:
  102. if not str(cate_exclude).startswith('|') and not str(self.cate_exclude).endswith('|'):
  103. self.cate_exclude = self.cate_exclude + '|' + cate_exclude
  104. else:
  105. self.cate_exclude += cate_exclude
  106. if tab_exclude:
  107. if not str(tab_exclude).startswith('|') and not str(self.tab_exclude).endswith('|'):
  108. self.tab_exclude = self.tab_exclude + '|' + tab_exclude
  109. else:
  110. self.tab_exclude += tab_exclude
  111. # print(self.cate_exclude)
  112. try:
  113. self.vod = redirect(url_for('vod')).headers['Location']
  114. except:
  115. self.vod = '/vod'
  116. # if not self.play_disable and self.lazy:
  117. if not self.play_disable:
  118. self.play_parse = rule.get('play_parse', False)
  119. try:
  120. play_url = getHost(self.lazy_mode)
  121. except:
  122. play_url = getHost(1, 5705)
  123. # play_url = new_conf.get('PLAY_URL',getHost(2))
  124. if not play_url.startswith('http'):
  125. play_url = 'http://' + play_url
  126. # print(play_url)
  127. if self.play_parse:
  128. # self.play_url = play_url + self.vod + '?play_url='
  129. js0_password = self.lsg.getItem('JS0_PASSWORD')
  130. # print(f'js0密码:{js0_password}')
  131. js0_password = f'pwd={js0_password}&' if js0_password else ''
  132. self.play_url = f'{play_url}{self.vod}?{js0_password}rule={self.id}&ext={ext}&play_url='
  133. # logger.info(f'cms重定向链接:{self.play_url}')
  134. else:
  135. self.play_url = ''
  136. else:
  137. self.play_parse = False
  138. self.play_url = ''
  139. logger.info('播放免嗅地址: ' + self.play_url)
  140. self.db = db
  141. self.RuleClass = RuleClass
  142. self.PlayParse = PlayParse
  143. host = rule.get('host', '').rstrip('/')
  144. host = unquote(host)
  145. HOST = host
  146. hostJs = rule.get('hostJs', '')
  147. if hostJs:
  148. try:
  149. jsp = jsoup(HOST)
  150. py_ctx.update({
  151. 'HOST': HOST,
  152. 'jsp': jsp,
  153. 'jq': jsp,
  154. 'TYPE': 'init',
  155. })
  156. ctx = py_ctx
  157. jscode = getPreJs() + hostJs.strip().replace('js:', '', 1)
  158. # print(jscode)
  159. loader, _ = runJScode(jscode, ctx=ctx)
  160. # print(loader.toString())
  161. HOST = loader.eval('HOST')
  162. # print(vods)
  163. # 一般都是正常的str
  164. if isinstance(HOST, PyJsString): # JsObjectWrapper
  165. HOST = parseText(str(HOST))
  166. host = HOST.rstrip('/')
  167. print('host:', host)
  168. except Exception as e:
  169. logger.info(f'执行{hostJs}获取host发生错误:{e}')
  170. timeout = rule.get('timeout', 5000)
  171. homeUrl = rule.get('homeUrl', '/')
  172. url = rule.get('url', '')
  173. detailUrl = rule.get('detailUrl', '')
  174. searchUrl = rule.get('searchUrl', '')
  175. default_headers = getHeaders(host)
  176. self_headers = rule.get('headers', {})
  177. default_headers.update(self_headers)
  178. headers = default_headers
  179. cookie = self.getCookie()
  180. # print(f'{self.title}cookie:{cookie}')
  181. self.oheaders = self_headers
  182. if cookie:
  183. headers['cookie'] = cookie
  184. self.oheaders['cookie'] = cookie
  185. limit = rule.get('limit', 6)
  186. encoding = rule.get('编码', 'utf-8')
  187. search_encoding = rule.get('搜索编码', '')
  188. self.limit = min(limit, 30)
  189. keys = headers.keys()
  190. for k in headers.keys():
  191. if str(k).lower() == 'user-agent':
  192. v = headers[k]
  193. if v == 'MOBILE_UA':
  194. headers[k] = MOBILE_UA
  195. elif v == 'PC_UA':
  196. headers[k] = PC_UA
  197. elif v == 'UC_UA':
  198. headers[k] = UC_UA
  199. elif v == 'IOS_UA':
  200. headers[k] = IOS_UA
  201. elif str(k).lower() == 'cookie':
  202. v = headers[k]
  203. if v and str(v).startswith('http'):
  204. try:
  205. ck = requests.get(v, timeout=timeout, verify=False)
  206. headers[k] = ck
  207. except Exception as e:
  208. logger.info(f'从{v}获取cookie发生错误:{e}')
  209. pass
  210. lower_keys = list(map(lambda x: x.lower(), keys))
  211. if not 'user-agent' in lower_keys:
  212. headers['User-Agent'] = UA
  213. if not 'referer' in lower_keys:
  214. headers['Referer'] = host
  215. self.headers = headers
  216. # print(headers)
  217. self.host = host
  218. self.homeUrl = urljoin(host, homeUrl) if host and homeUrl else homeUrl or host
  219. if url.find('[') > -1 and url.find(']') > -1:
  220. u1 = url.split('[')[0]
  221. u2 = url.split('[')[1].split(']')[0]
  222. self.url = urljoin(host, u1) + '[' + urljoin(host, u2) + ']' if host and url else url
  223. else:
  224. self.url = urljoin(host, url) if host and url else url
  225. if searchUrl.find('[') > -1 and searchUrl.find(']') > -1 and '#' not in searchUrl:
  226. u1 = searchUrl.split('[')[0]
  227. u2 = searchUrl.split('[')[1].split(']')[0]
  228. self.searchUrl = urljoin(host, u1) + '[' + urljoin(host, u2) + ']' if host and searchUrl else searchUrl
  229. else:
  230. self.searchUrl = urljoin(host, searchUrl) if host and searchUrl else searchUrl
  231. self.detailUrl = urljoin(host, detailUrl) if host and detailUrl else detailUrl
  232. self.class_name = rule.get('class_name', '')
  233. self.class_url = rule.get('class_url', '')
  234. self.class_parse = rule.get('class_parse', '')
  235. self.filter_name = rule.get('filter_name', '')
  236. self.filter_url = rule.get('filter_url', '')
  237. self.filter_parse = rule.get('filter_parse', '')
  238. self.double = rule.get('double', False)
  239. self.一级 = rule.get('一级', '')
  240. self.二级 = rule.get('二级', '')
  241. self.二级访问前 = rule.get('二级访问前', '')
  242. self.搜索 = rule.get('搜索', '')
  243. self.推荐 = rule.get('推荐', '')
  244. self.图片来源 = rule.get('图片来源', '')
  245. self.encoding = encoding
  246. self.search_encoding = search_encoding
  247. self.timeout = round(int(timeout) / 1000, 2)
  248. self.filter = rule.get('filter', [])
  249. self.filter_def = rule.get('filter_def', {})
  250. self.play_json = rule['play_json'] if 'play_json' in rule else []
  251. self.pagecount = rule['pagecount'] if 'pagecount' in rule else {}
  252. self.extend = rule.get('extend', [])
  253. self.d = self.getObject()
  254. def getName(self):
  255. return self.title
  256. def getObject(self):
  257. o = edict({
  258. 'jsp': jsoup(self.url),
  259. 'getParse': self.getParse,
  260. 'saveParse': self.saveParse,
  261. 'oheaders': self.oheaders,
  262. 'headers': self.headers, # 通用免嗅需要
  263. 'encoding': self.encoding,
  264. 'name': self.title,
  265. 'timeout': self.timeout,
  266. })
  267. return o
  268. def regexp(self, prule, text, pos=None):
  269. ret = re.search(prule, text).groups()
  270. if pos != None and isinstance(pos, int):
  271. return ret[pos]
  272. else:
  273. return ret
  274. def test(self, text, string):
  275. searchObj = re.search(rf'{text}', string, re.M | re.I)
  276. # print(searchObj)
  277. # global vflag
  278. if searchObj:
  279. # vflag = searchObj.group()
  280. pass
  281. return searchObj
  282. def blank(self):
  283. result = {
  284. 'list': []
  285. }
  286. return result
  287. def blank_vod(self):
  288. return {
  289. "vod_id": "id",
  290. "vod_name": "片名",
  291. "vod_pic": "", # 图片
  292. "type_name": "剧情",
  293. "vod_year": "年份",
  294. "vod_area": "地区",
  295. "vod_remarks": "更新信息",
  296. "vod_actor": "主演",
  297. "vod_director": "导演",
  298. "vod_content": "简介"
  299. }
  300. def jsoup(self):
  301. jsp = jsoup(self.url)
  302. pdfh = jsp.pdfh
  303. pdfa = jsp.pdfa
  304. pd = jsp.pd
  305. pjfh = jsp.pjfh
  306. pjfa = jsp.pjfa
  307. pj = jsp.pj
  308. pq = jsp.pq
  309. return pdfh, pdfa, pd, pq
  310. def getClasses(self):
  311. if not self.db:
  312. msg = '未提供数据库连接'
  313. print(msg)
  314. return []
  315. name = self.getName()
  316. # self.db.metadata.clear()
  317. # RuleClass = rule_classes.init(self.db)
  318. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  319. # _logger.info('xxxxxx')
  320. if res:
  321. if not all([res.class_name, res.class_url]):
  322. return []
  323. cls = res.class_name.split('&')
  324. cls2 = res.class_url.split('&')
  325. classes = [{'type_name': cls[i], 'type_id': cls2[i]} for i in range(len(cls))]
  326. # _logger.info(classes)
  327. logger.info(f"{self.getName()}使用缓存分类:{classes}")
  328. return classes
  329. else:
  330. return []
  331. def getCookie(self):
  332. name = self.getName()
  333. if not self.db:
  334. msg = f'{name}未提供数据库连接'
  335. print(msg)
  336. return False
  337. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  338. if res:
  339. return res.cookie or None
  340. else:
  341. return None
  342. def saveCookie(self, cookie):
  343. name = self.getName()
  344. if not self.db:
  345. msg = f'{name}未提供数据库连接'
  346. print(msg)
  347. return False
  348. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  349. if res:
  350. res.cookie = cookie
  351. self.db.session.add(res)
  352. else:
  353. res = self.RuleClass(name=name, cookie=cookie)
  354. self.db.session.add(res)
  355. try:
  356. self.db.session.commit()
  357. logger.info(f'{name}已保存cookie:{cookie}')
  358. except Exception as e:
  359. return f'保存cookie发生了错误:{e}'
  360. def saveClass(self, classes):
  361. if not self.db:
  362. msg = '未提供数据库连接'
  363. print(msg)
  364. return msg
  365. name = self.getName()
  366. class_name = '&'.join([cl['type_name'] for cl in classes])
  367. class_url = '&'.join([cl['type_id'] for cl in classes])
  368. # data = RuleClass.query.filter(RuleClass.name == '555影视').all()
  369. # self.db.metadata.clear()
  370. # RuleClass = rule_classes.init(self.db)
  371. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  372. # print(res)
  373. if res:
  374. res.class_name = class_name
  375. res.class_url = class_url
  376. self.db.session.add(res)
  377. msg = f'{self.getName()}修改成功:{res.id}'
  378. else:
  379. res = self.RuleClass(name=name, class_name=class_name, class_url=class_url)
  380. self.db.session.add(res)
  381. res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first()
  382. msg = f'{self.getName()}新增成功:{res.id}'
  383. try:
  384. self.db.session.commit()
  385. logger.info(msg)
  386. except Exception as e:
  387. return f'发生了错误:{e}'
  388. def getParse(self, play_url):
  389. if not self.db:
  390. msg = '未提供数据库连接'
  391. print(msg)
  392. return ''
  393. name = self.getName()
  394. # self.db.metadata.clear()
  395. # RuleClass = rule_classes.init(self.db)
  396. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  397. # _logger.info('xxxxxx')
  398. if res:
  399. real_url = res.real_url
  400. logger.info(f"{name}使用缓存播放地址:{real_url}")
  401. return real_url
  402. else:
  403. return ''
  404. def dealJson(self, html):
  405. try:
  406. # res = re.search('.*?\{(.*)\}',html,re.M|re.I).groups()[0]
  407. res = re.search('.*?\{(.*)\}', html, re.M | re.S).groups()[0]
  408. html = '{' + res + '}'
  409. return html
  410. except:
  411. return html
  412. def checkHtml(self, r):
  413. r.encoding = self.encoding
  414. html = r.text
  415. if html.find('?btwaf=') > -1:
  416. btwaf = re.search('btwaf(.*?)"', html, re.M | re.I).groups()[0]
  417. url = r.url.split('#')[0] + '?btwaf' + btwaf
  418. # print(f'需要过宝塔验证:{url}')
  419. cookies_dict = requests.utils.dict_from_cookiejar(r.cookies)
  420. cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
  421. self.headers['cookie'] = cookie_str
  422. r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False)
  423. r.encoding = self.encoding
  424. html = r.text
  425. if html.find('?btwaf=') < 0:
  426. self.saveCookie(cookie_str)
  427. # print(html)
  428. return html
  429. def saveParse(self, play_url, real_url):
  430. if not self.db:
  431. msg = '未提供数据库连接'
  432. print(msg)
  433. return msg
  434. name = self.getName()
  435. # data = RuleClass.query.filter(RuleClass.name == '555影视').all()
  436. # self.db.metadata.clear()
  437. # RuleClass = rule_classes.init(self.db)
  438. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  439. # print(res)
  440. if res:
  441. res.real_url = real_url
  442. self.db.session.add(res)
  443. msg = f'{name}服务端免嗅修改成功:{res.id}'
  444. else:
  445. res = self.PlayParse(play_url=play_url, real_url=real_url)
  446. self.db.session.add(res)
  447. res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
  448. msg = f'{name}服务端免嗅新增成功:{res.id}'
  449. try:
  450. self.db.session.commit()
  451. logger.info(msg)
  452. except Exception as e:
  453. return f'{name}发生了错误:{e}'
  454. def homeContent(self, fypage=1):
  455. # yanaifei
  456. # https://yanetflix.com/vodtype/dianying.html
  457. t1 = time()
  458. result = {}
  459. classes = []
  460. video_result = self.blank()
  461. if self.class_url and self.class_name:
  462. class_names = self.class_name.split('&')
  463. class_urls = self.class_url.split('&')
  464. cnt = min(len(class_urls), len(class_names))
  465. for i in range(cnt):
  466. classes.append({
  467. 'type_name': class_names[i],
  468. 'type_id': class_urls[i]
  469. })
  470. # print(self.url)
  471. print(self.headers)
  472. has_cache = False
  473. # print(self.homeUrl)
  474. if self.homeUrl.startswith('http'):
  475. # print(self.class_parse)
  476. try:
  477. if self.class_parse:
  478. t2 = time()
  479. cache_classes = self.getClasses()
  480. logger.info(f'{self.getName()}读取缓存耗时:{get_interval(t2)}毫秒')
  481. if len(cache_classes) > 0:
  482. classes = cache_classes
  483. # print(cache_classes)
  484. has_cache = True
  485. # logger.info(f'是否有缓存分类:{has_cache}')
  486. if has_cache and not self.推荐:
  487. pass
  488. else:
  489. new_classes = []
  490. r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout, verify=False)
  491. html = self.checkHtml(r)
  492. # print(html)
  493. # print(self.headers)
  494. if self.class_parse and not has_cache:
  495. p = self.class_parse.split(';')
  496. # print(p[0])
  497. # print(html)
  498. jsp = jsoup(self.url)
  499. pdfh = jsp.pdfh
  500. pdfa = jsp.pdfa
  501. pd = jsp.pd
  502. items = pdfa(html, p[0])
  503. # print(len(items))
  504. # print(items)
  505. for item in items:
  506. title = pdfh(item, p[1])
  507. # 过滤排除掉标题名称
  508. if self.cate_exclude and jsp.test(self.cate_exclude, title):
  509. continue
  510. url = pd(item, p[2])
  511. # print(url)
  512. tag = url
  513. if len(p) > 3 and p[3].strip():
  514. try:
  515. tag = self.regexp(p[3].strip(), url, 0)
  516. except:
  517. logger.info(f'分类匹配错误:{title}对应的链接{url}无法匹配{p[3]}')
  518. continue
  519. new_classes.append({
  520. 'type_name': title,
  521. 'type_id': tag
  522. })
  523. if len(new_classes) > 0:
  524. classes.extend(new_classes)
  525. self.saveClass(classes)
  526. video_result = self.homeVideoContent(html, fypage)
  527. except Exception as e:
  528. logger.info(f'{self.getName()}主页发生错误:{e}')
  529. classes = list(
  530. filter(lambda x: not self.cate_exclude or not jsoup(self.url).test(self.cate_exclude, x['type_name']),
  531. classes))
  532. result['class'] = classes
  533. if self.filter:
  534. if isinstance(self.filter, dict):
  535. result['filters'] = self.filter
  536. else:
  537. result['filters'] = playerConfig['filter']
  538. result.update(video_result)
  539. # print(result)
  540. logger.info(f'{self.getName()}获取首页总耗时(包含读取缓存):{get_interval(t1)}毫秒')
  541. return result
  542. def homeVideoContent(self, html, fypage=1):
  543. p = self.推荐
  544. if not p:
  545. return self.blank()
  546. jsp = jsoup(self.homeUrl)
  547. result = {}
  548. videos = []
  549. is_js = isinstance(p, str) and str(p).strip().startswith('js:') # 是js
  550. if is_js:
  551. headers['Referer'] = getHome(self.host)
  552. py_ctx.update({
  553. 'input': self.homeUrl,
  554. 'HOST': self.host,
  555. 'TYPE': 'home', # 海阔js环境标志
  556. 'oheaders': self.d.oheaders,
  557. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  558. 'd': self.d,
  559. 'getParse': self.d.getParse,
  560. 'saveParse': self.d.saveParse,
  561. 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail,
  562. })
  563. ctx = py_ctx
  564. jscode = getPreJs() + p.strip().replace('js:', '', 1)
  565. # print(jscode)
  566. try:
  567. loader, _ = runJScode(jscode, ctx=ctx)
  568. # print(loader.toString())
  569. vods = loader.eval('VODS')
  570. # print(vods)
  571. if isinstance(vods, JsObjectWrapper):
  572. videos = vods.to_list()
  573. except Exception as e:
  574. logger.info(f'首页推荐执行js获取列表出错:{e}')
  575. else:
  576. if p == '*' and self.一级:
  577. p = self.一级
  578. self.double = False
  579. logger.info(f'首页推荐继承一级: {p}')
  580. p = p.strip().split(';') # 解析
  581. if not self.double and len(p) < 5:
  582. return self.blank()
  583. if self.double and len(p) < 6:
  584. return self.blank()
  585. jsp = jsoup(self.homeUrl)
  586. pp = self.一级.split(';')
  587. def getPP(p, pn, pp, ppn):
  588. try:
  589. ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
  590. return ps
  591. except Exception as e:
  592. return ''
  593. p0 = getPP(p, 0, pp, 0)
  594. is_json = str(p0).startswith('json:')
  595. if is_json:
  596. html = self.dealJson(html)
  597. pdfh = jsp.pjfh if is_json else jsp.pdfh
  598. pdfa = jsp.pjfa if is_json else jsp.pdfa
  599. pd = jsp.pj if is_json else jsp.pd
  600. # print(html)
  601. try:
  602. if self.double:
  603. items = pdfa(html, p0.replace('json:', ''))
  604. # print(p[0])
  605. # print(items)
  606. # print(len(items))
  607. p1 = getPP(p, 1, pp, 0)
  608. p2 = getPP(p, 2, pp, 1)
  609. p3 = getPP(p, 3, pp, 2)
  610. p4 = getPP(p, 4, pp, 3)
  611. p5 = getPP(p, 5, pp, 4)
  612. p6 = getPP(p, 6, pp, 5)
  613. for item in items:
  614. items2 = pdfa(item, p1)
  615. # print(len(items2))
  616. for item2 in items2:
  617. try:
  618. title = pdfh(item2, p2)
  619. # print(title)
  620. try:
  621. img = pd(item2, p3)
  622. except:
  623. img = ''
  624. try:
  625. desc = pdfh(item2, p4)
  626. except:
  627. desc = ''
  628. links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in
  629. p5.split('+')]
  630. vid = '$'.join(links)
  631. if len(p) > 6 and p[6]:
  632. content = pdfh(item2, p6)
  633. else:
  634. content = ''
  635. if self.二级 == '*':
  636. vid = vid + '@@' + title + '@@' + img
  637. videos.append({
  638. "vod_id": vid,
  639. "vod_name": title,
  640. "vod_pic": img,
  641. "vod_remarks": desc,
  642. "no_use": {
  643. "vod_content": content,
  644. "type_id": 1,
  645. "type_name": "首页推荐",
  646. },
  647. })
  648. except:
  649. pass
  650. else:
  651. items = pdfa(html, p0.replace('json:', ''))
  652. # print(items)
  653. p1 = getPP(p, 1, pp, 1)
  654. p2 = getPP(p, 2, pp, 2)
  655. p3 = getPP(p, 3, pp, 3)
  656. p4 = getPP(p, 4, pp, 4)
  657. p5 = getPP(p, 5, pp, 5)
  658. for item in items:
  659. try:
  660. title = pdfh(item, p1)
  661. try:
  662. img = pd(item, p2)
  663. except:
  664. img = ''
  665. try:
  666. desc = pdfh(item, p3)
  667. except:
  668. desc = ''
  669. # link = pd(item, p[4])
  670. links = [pd(item, _p5) if not self.detailUrl else pdfh(item, _p5) for _p5 in p4.split('+')]
  671. vid = '$'.join(links)
  672. if len(p) > 5 and p[5]:
  673. content = pdfh(item, p5)
  674. else:
  675. content = ''
  676. if self.二级 == '*':
  677. vid = vid + '@@' + title + '@@' + img
  678. videos.append({
  679. "vod_id": vid,
  680. "vod_name": title,
  681. "vod_pic": img,
  682. "vod_remarks": desc,
  683. "no_use": {
  684. "vod_content": content,
  685. "type_id": 1,
  686. "type_name": "首页推荐",
  687. },
  688. })
  689. except:
  690. pass
  691. # result['list'] = videos[min((fypage-1)*self.limit,len(videos)-1):min(fypage*self.limit,len(videos))]
  692. except Exception as e:
  693. logger.info(f'首页内容获取失败:{e}')
  694. return self.blank()
  695. if self.图片来源:
  696. for video in videos:
  697. if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'):
  698. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  699. result['list'] = videos
  700. # print(videos)
  701. result['no_use'] = {
  702. 'code': 1,
  703. 'msg': '数据列表',
  704. 'page': fypage,
  705. 'pagecount': math.ceil(len(videos) / self.limit),
  706. 'limit': self.limit,
  707. 'total': len(videos),
  708. 'now_count': len(result['list']),
  709. }
  710. # print(result)
  711. return result
  712. def categoryContent(self, fyclass, fypage, fl=None):
  713. """
  714. 一级带分类的数据返回
  715. :param fyclass: 分类标识
  716. :param fypage: 页码
  717. :param fl: 筛选
  718. :return: cms一级数据
  719. """
  720. if fl is None:
  721. fl = {}
  722. # print(f'fl:{fl}')
  723. if self.filter_def and isinstance(self.filter_def, dict):
  724. try:
  725. if self.filter_def.get(fyclass) and isinstance(self.filter_def[fyclass], dict):
  726. self_filter_def = self.filter_def[fyclass]
  727. filter_def = ujson.loads(ujson.dumps(self_filter_def))
  728. filter_def.update(fl)
  729. fl = filter_def
  730. except Exception as e:
  731. print(f'合并不同分类对应的默认筛选出错:{e}')
  732. # print(fl)
  733. result = {}
  734. # urlParams = ["", "", "", "", "", "", "", "", "", "", "", ""]
  735. # urlParams = [""] * 12
  736. # urlParams[0] = tid
  737. # urlParams[8] = str(pg)
  738. # for key in self.extend:
  739. # urlParams[int(key)] = self.extend[key]
  740. # params = '-'.join(urlParams)
  741. # print(params)
  742. # url = self.url + '/{0}.html'.format
  743. t1 = time()
  744. pg = str(fypage)
  745. url = self.url.replace('fyclass', fyclass)
  746. if fypage == 1 and self.test('[\[\]]', url):
  747. url = url.split('[')[1].split(']')[0]
  748. elif fypage > 1 and self.test('[\[\]]', url):
  749. url = url.split('[')[0]
  750. if self.filter_url:
  751. if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
  752. if not url.endswith('&') and not self.filter_url.startswith('&'):
  753. url += '&'
  754. url += self.filter_url
  755. else: # 第二种情况直接替换关键字为待拼接的结果后面渲染,适用于 ----fypage.html的情况
  756. url = url.replace('fyfilter', self.filter_url)
  757. # print(f'url渲染:{url}')
  758. url = render_template_string(url, fl=fl)
  759. # fl_url = render_template_string(self.filter_url,fl=fl)
  760. # if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数
  761. # if not url.endswith('&') and not fl_url.startswith('&'):
  762. # url += '&'
  763. # url += fl_url
  764. # else: # 第二种情况直接替换关键字为渲染后的结果,适用于 ----fypage.html的情况
  765. # url = url.replace('fyfilter',fl_url)
  766. if url.find('fypage') > -1:
  767. if '(' in url and ')' in url:
  768. # url_rep = url[url.find('('):url.find(')')+1]
  769. # cnt_page = url.split('(')[1].split(')')[0].replace('fypage',pg)
  770. # print(url_rep)
  771. url_rep = re.search('.*?\((.*)\)', url, re.M | re.S).groups()[0]
  772. cnt_page = url_rep.replace('fypage', pg)
  773. # print(url_rep)
  774. # print(cnt_page)
  775. cnt_ctx = {}
  776. safe_eval(f'cnt_pg={cnt_page}', cnt_ctx)
  777. # exec(f'cnt_pg={cnt_page}', cnt_ctx)
  778. cnt_pg = str(cnt_ctx['cnt_pg']) if cnt_ctx.get('cnt_pg') else 1 # 计算表达式的结果
  779. url = url.replace(url_rep, str(cnt_pg)).replace('(', '').replace(')', '')
  780. # print(url)
  781. else:
  782. url = url.replace('fypage', pg)
  783. # print(url)
  784. logger.info(url)
  785. p = self.一级
  786. jsp = jsoup(self.url)
  787. videos = []
  788. is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
  789. if is_js:
  790. headers['Referer'] = getHome(url)
  791. py_ctx.update({
  792. 'input': url,
  793. 'TYPE': 'cate', # 海阔js环境标志
  794. 'oheaders': self.d.oheaders,
  795. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  796. 'd': self.d,
  797. 'MY_CATE': fyclass, # 分类id
  798. 'MY_FL': fl, # 筛选
  799. 'MY_PAGE': fypage, # 页数
  800. 'detailUrl': self.detailUrl or '', # 详情页链接
  801. 'getParse': self.d.getParse,
  802. 'saveParse': self.d.saveParse,
  803. 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail,
  804. })
  805. ctx = py_ctx
  806. # print(ctx)
  807. jscode = getPreJs() + p.replace('js:', '', 1)
  808. # print(jscode)
  809. loader, _ = runJScode(jscode, ctx=ctx)
  810. # print(loader.toString())
  811. vods = loader.eval('VODS')
  812. # print('vods:',vods)
  813. if isinstance(vods, JsObjectWrapper):
  814. videos = vods.to_list()
  815. else:
  816. p = p.split(';') # 解析
  817. # print(len(p))
  818. # print(p)
  819. if len(p) < 5:
  820. return self.blank()
  821. is_json = str(p[0]).startswith('json:')
  822. pdfh = jsp.pjfh if is_json else jsp.pdfh
  823. pdfa = jsp.pjfa if is_json else jsp.pdfa
  824. pd = jsp.pj if is_json else jsp.pd
  825. # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text'))
  826. # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)'))
  827. # print(pdfh(r.text,'body a.module-poster-item.module-item:first'))
  828. items = []
  829. try:
  830. r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False)
  831. html = self.checkHtml(r)
  832. print(self.headers)
  833. # print(html)
  834. if is_json:
  835. html = self.dealJson(html)
  836. html = json.loads(html)
  837. # else:
  838. # soup = bs4.BeautifulSoup(html, 'lxml')
  839. # html = soup.prettify()
  840. # print(html)
  841. # with open('1.html',mode='w+',encoding='utf-8') as f:
  842. # f.write(html)
  843. items = pdfa(html, p[0].replace('json:', '', 1))
  844. except:
  845. pass
  846. # print(items)
  847. for item in items:
  848. # print(item)
  849. try:
  850. title = pdfh(item, p[1])
  851. img = pd(item, p[2])
  852. desc = pdfh(item, p[3])
  853. links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')]
  854. link = '$'.join(links)
  855. content = '' if len(p) < 6 else pdfh(item, p[5])
  856. # sid = self.regStr(sid, "/video/(\\S+).html")
  857. vod_id = f'{fyclass}${link}' if self.detailUrl else link # 分类,播放链接
  858. if self.二级 == '*':
  859. vod_id = vod_id + '@@' + title + '@@' + img
  860. videos.append({
  861. "vod_id": vod_id,
  862. "vod_name": title,
  863. "vod_pic": img,
  864. "vod_remarks": desc,
  865. "vod_content": content,
  866. })
  867. except Exception as e:
  868. print(f'发生了错误:{e}')
  869. pass
  870. if self.图片来源:
  871. for video in videos:
  872. if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'):
  873. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  874. print('videos:', videos)
  875. limit = 40
  876. cnt = 9999 if len(videos) > 0 else 0
  877. pagecount = 0
  878. if self.pagecount and isinstance(self.pagecount, dict) and fyclass in self.pagecount:
  879. print(f'fyclass:{fyclass},self.pagecount:{self.pagecount}')
  880. pagecount = int(self.pagecount[fyclass])
  881. result['list'] = videos
  882. result['page'] = fypage
  883. result['pagecount'] = pagecount or max(cnt, fypage)
  884. result['limit'] = limit
  885. result['total'] = cnt
  886. # print(result)
  887. # print(result['pagecount'])
  888. logger.info(
  889. f'{self.getName()}获取分类{fyclass}第{fypage}页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb')
  890. nodata = {
  891. 'list': [{'vod_name': '无数据,防无限请求', 'vod_id': 'no_data', 'vod_remarks': '不要点,会崩的',
  892. 'vod_pic': 'https://ghproxy.net/https://raw.githubusercontent.com/hjdhnx/dr_py/main/404.jpg'}],
  893. 'total': 1, 'pagecount': 1, 'page': 1, 'limit': 1
  894. }
  895. # return result
  896. return result if len(result['list']) > 0 else nodata
  897. def 二级渲染(self, parse_str: 'str|dict', **kwargs):
  898. # *args是不定长参数 列表
  899. # ** args是不定长参数字典
  900. p = parse_str # 二级传递解析表达式 js的obj json对象
  901. detailUrl = kwargs.get('detailUrl', '') # 不定长字典传递的二级详情页vod_id详情处理数据
  902. orId = kwargs.get('orId', '') # 不定长字典传递的二级详情页vod_id原始数据
  903. url = kwargs.get('url', '') # 不定长字典传递的二级详情页链接智能拼接数据
  904. vod = kwargs.get('vod', self.blank_vod()) # 最终要返回的二级详情页数据 默认空
  905. html = kwargs.get('html', '') # 不定长字典传递的源码(如果不传才会在下面程序中去获取)
  906. show_name = kwargs.get('show_name', '') # 是否显示来源(用于drpy区分)
  907. jsp = kwargs.get('jsp', '') # jsp = jsoup(self.url) 传递的jsp解析
  908. fyclass = kwargs.get('fyclass', '') # 二级传递的分类名称,可以得知进去的类别
  909. play_url = self.play_url
  910. vod_name = '片名'
  911. vod_pic = ''
  912. # print('二级url:',url)
  913. if self.二级 == '*':
  914. extra = orId.split('@@')
  915. vod_name = extra[1] if len(extra) > 1 else vod_name
  916. vod_pic = extra[2] if len(extra) > 2 else vod_pic
  917. if self.play_json:
  918. play_url = play_url.replace('&play_url=', '&type=json&play_url=')
  919. if p == '*': # 解析表达式为*默认一级直接变播放
  920. vod['vod_play_from'] = '道长在线'
  921. vod['vod_remarks'] = detailUrl
  922. vod['vod_actor'] = '没有二级,只有一级链接直接嗅探播放'
  923. # vod['vod_content'] = url if not show_name else f'({self.id}) {url}'
  924. vod['vod_content'] = url
  925. vod['vod_id'] = orId
  926. vod['vod_name'] = vod_name
  927. vod['vod_pic'] = vod_pic
  928. vod['vod_play_url'] = '嗅探播放$' + play_url + url.split('@@')[0]
  929. elif not p or (not isinstance(p, dict) and not isinstance(p, str)) or (
  930. isinstance(p, str) and not str(p).startswith('js:')):
  931. pass
  932. else:
  933. is_json = p.get('is_json', False) if isinstance(p, dict) else False # 二级里加is_json参数
  934. pdfh = jsp.pjfh if is_json else jsp.pdfh
  935. pdfa = jsp.pjfa if is_json else jsp.pdfa
  936. pd = jsp.pj if is_json else jsp.pd
  937. pq = jsp.pq
  938. vod['vod_id'] = orId
  939. if not html: # 没传递html参数接detailUrl下来智能获取
  940. r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False)
  941. html = self.checkHtml(r)
  942. if is_json:
  943. html = self.dealJson(html)
  944. html = json.loads(html)
  945. tt1 = time()
  946. if p.get('title'):
  947. p1 = p['title'].split(';')
  948. vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip()
  949. vod['type_name'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else ''
  950. if p.get('desc'):
  951. try:
  952. p1 = p['desc'].split(';')
  953. vod['vod_remarks'] = pdfh(html, p1[0]).replace('\n', '').strip()
  954. vod['vod_year'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else ''
  955. vod['vod_area'] = pdfh(html, p1[2]).replace('\n', ' ').strip() if len(p1) > 2 else ''
  956. vod['vod_actor'] = pdfh(html, p1[3]).replace('\n', ' ').strip() if len(p1) > 3 else ''
  957. vod['vod_director'] = pdfh(html, p1[4]).replace('\n', ' ').strip() if len(p1) > 4 else ''
  958. except:
  959. pass
  960. if p.get('content'):
  961. p1 = p['content'].split(';')
  962. try:
  963. content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
  964. vod['vod_content'] = content
  965. except:
  966. pass
  967. if p.get('img'):
  968. p1 = p['img']
  969. try:
  970. img = pd(html, p1)
  971. vod['vod_pic'] = img
  972. except Exception as e:
  973. logger.info(f'二级图片定位失败,但不影响使用{e}')
  974. vod_play_from = '$$$'
  975. playFrom = []
  976. init_flag = {'ctx': False}
  977. def js_pre():
  978. headers['Referer'] = getHome(url)
  979. py_ctx.update({
  980. 'input': url,
  981. 'html': html,
  982. 'TYPE': 'detail', # 海阔js环境标志
  983. 'MY_CATE': fyclass, # 分类id
  984. 'oheaders': self.d.oheaders,
  985. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  986. 'd': self.d,
  987. 'getParse': self.d.getParse,
  988. 'saveParse': self.d.saveParse,
  989. 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, 'play_url': play_url
  990. })
  991. init_flag['ctx'] = True
  992. if p.get('重定向') and str(p['重定向']).startswith('js:'):
  993. if not init_flag['ctx']:
  994. js_pre()
  995. ctx = py_ctx
  996. # print(ctx)
  997. rcode = p['重定向'].replace('js:', '', 1)
  998. jscode = getPreJs() + rcode
  999. # print(jscode)
  1000. loader, _ = runJScode(jscode, ctx=ctx)
  1001. # print(loader.toString())
  1002. logger.info(f'开始执行二级重定向代码:{rcode}')
  1003. html = loader.eval('html')
  1004. if isinstance(vod, JsObjectWrapper):
  1005. html = str(html)
  1006. if p.get('tabs'):
  1007. vodHeader = []
  1008. if str(p['tabs']).startswith('js:'):
  1009. if not init_flag['ctx']:
  1010. js_pre()
  1011. ctx = py_ctx
  1012. rcode = p['tabs'].replace('js:', '', 1)
  1013. jscode = getPreJs() + rcode
  1014. # print(jscode)
  1015. loader, _ = runJScode(jscode, ctx=ctx)
  1016. # print(loader.toString())
  1017. logger.info(f'开始执行tabs代码:{rcode}')
  1018. vHeader = loader.eval('TABS')
  1019. if isinstance(vod, JsObjectWrapper):
  1020. vHeader = vHeader.to_list()
  1021. vodHeader = vHeader
  1022. else:
  1023. tab_parse = p['tabs'].split(';')[0]
  1024. # print('tab_parse:',tab_parse)
  1025. vHeader = pdfa(html, tab_parse)
  1026. # print(vHeader)
  1027. print(f'二级线路定位列表数:{len((vHeader))}')
  1028. # print(vHeader[0].outerHtml())
  1029. # print(vHeader[0].toString())
  1030. # from lxml import etree
  1031. # print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8'))
  1032. from lxml.html import tostring as html2str
  1033. # print(html2str(vHeader[0].root).decode('utf-8'))
  1034. tab_text = p.get('tab_text', '') or 'body&&Text'
  1035. # print('tab_text:'+tab_text)
  1036. if not is_json:
  1037. for v in vHeader:
  1038. # 过滤排除掉线路标题
  1039. # v_title = pq(v).text()
  1040. v_title = pdfh(v, tab_text).strip()
  1041. # print(v_title)
  1042. if self.tab_exclude and jsp.test(self.tab_exclude, v_title):
  1043. continue
  1044. vodHeader.append(v_title)
  1045. else:
  1046. vodHeader = vHeader
  1047. print(f'过滤后真实线路列表数:{len((vodHeader))} {vodHeader}')
  1048. else:
  1049. vodHeader = ['道长在线']
  1050. # print(vodHeader)
  1051. # print(vod)
  1052. new_map = {}
  1053. for v in vodHeader:
  1054. if not v in new_map:
  1055. new_map[v] = 1
  1056. else:
  1057. new_map[v] += 1
  1058. if new_map[v] > 1:
  1059. v = f'{v}{new_map[v] - 1}'
  1060. playFrom.append(v)
  1061. vod_play_from = vod_play_from.join(playFrom)
  1062. vod_play_url = '$$$'
  1063. vod_tab_list = []
  1064. if p.get('lists'):
  1065. if str(p['lists']).startswith('js:'):
  1066. if not init_flag['ctx']:
  1067. js_pre()
  1068. ctx = py_ctx
  1069. ctx['TABS'] = vodHeader # 把选集列表传过去
  1070. rcode = p['lists'].replace('js:', '', 1)
  1071. jscode = getPreJs() + rcode
  1072. # print(jscode)
  1073. loader, _ = runJScode(jscode, ctx=ctx)
  1074. # print(loader.toString())
  1075. logger.info(f'开始执行lists代码:{rcode}')
  1076. vlists = loader.eval('LISTS')
  1077. if isinstance(vod, JsObjectWrapper):
  1078. vlists = vlists.to_list() # [['第1集$http://1.mp4','第2集$http://2.mp4'],['第3集$http://1.mp4','第4集$http://2.mp4']]
  1079. for i in range(len(vlists)):
  1080. try:
  1081. vlists[i] = list(map(lambda x: '$'.join(x.split('$')[:2]), vlists[i]))
  1082. except Exception as e:
  1083. logger.info(f'LISTS格式化发生错误:{e}')
  1084. vod_play_url = vod_play_url.join(list(map(lambda x: '#'.join(x), vlists)))
  1085. else:
  1086. list_text = p.get('list_text', '') or 'body&&Text'
  1087. list_url = p.get('list_url', '') or 'a&&href'
  1088. print('list_text:' + list_text)
  1089. print('list_url:' + list_url)
  1090. is_tab_js = p['tabs'].strip().startswith('js:')
  1091. for i in range(len(vodHeader)):
  1092. tab_name = str(vodHeader[i])
  1093. # print(tab_name)
  1094. tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 and not is_tab_js else ''
  1095. p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i))
  1096. tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i))
  1097. # print(p1)
  1098. vodList = pdfa(html, p1) # 1条线路的选集列表
  1099. # print(vodList)
  1100. # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接
  1101. # pq(i).text()
  1102. if self.play_parse: # 自动base64编码
  1103. vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + encodeUrl(i)
  1104. for i
  1105. in vodList] if is_json else \
  1106. [pdfh(i, list_text) + '$' + play_url + encodeUrl(pd(i, list_url)) for i in
  1107. vodList] # 拼接成 名称$链接
  1108. else:
  1109. vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + i for i in
  1110. vodList] if is_json else \
  1111. [pdfh(i, list_text) + '$' + play_url + pd(i, list_url) for i in vodList] # 拼接成 名称$链接
  1112. # print(vodList)
  1113. vodList = forceOrder(vodList, option=lambda x: x.split('$')[0])
  1114. # print(vodList)
  1115. vlist = '#'.join(vodList) # 拼多个选集
  1116. # print(vlist)
  1117. vod_tab_list.append(vlist)
  1118. vod_play_url = vod_play_url.join(vod_tab_list)
  1119. vod_play_url_str = vod_play_url[:min(len(vod_play_url), 500)]
  1120. print(vod_play_url_str)
  1121. vod['vod_play_from'] = vod_play_from
  1122. # print(vod_play_from)
  1123. vod['vod_play_url'] = vod_play_url
  1124. logger.info(
  1125. f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb')
  1126. if show_name:
  1127. vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}'
  1128. return vod
  1129. def detailOneVod(self, id, fyclass='', show_name=False):
  1130. vod = self.blank_vod()
  1131. orId = str(id)
  1132. orUrl = orId
  1133. if fyclass:
  1134. orUrl = f'{fyclass}${orId}'
  1135. detailUrl = orId.split('@@')[0]
  1136. # print(detailUrl)
  1137. if not detailUrl.startswith('http') and not '/' in detailUrl:
  1138. url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass', fyclass)
  1139. # print(url)
  1140. elif '/' in detailUrl:
  1141. url = urljoin(self.homeUrl, detailUrl)
  1142. else:
  1143. url = detailUrl
  1144. if self.二级访问前:
  1145. logger.info(f'尝试在二级访问前执行代码: {self.二级访问前}')
  1146. py_ctx.update({
  1147. 'MY_URL': url,
  1148. 'oheaders': self.d.oheaders,
  1149. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1150. 'd': self.d,
  1151. })
  1152. ctx = py_ctx
  1153. jscode = getPreJs() + self.二级访问前.replace('js:', '', 1)
  1154. # print(jscode)
  1155. loader, _ = runJScode(jscode, ctx=ctx)
  1156. try:
  1157. MY_URL = loader.eval('MY_URL')
  1158. if isinstance(MY_URL, JsObjectWrapper):
  1159. MY_URL = str(MY_URL)
  1160. if MY_URL:
  1161. url = MY_URL
  1162. except Exception as e:
  1163. logger.info(f'执行二级访问前发生错误: {e}')
  1164. logger.info(f'进入详情页: {url}')
  1165. try:
  1166. p = self.二级 # 解析
  1167. jsp = jsoup(url) if url.startswith('http') else jsoup(self.url)
  1168. is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
  1169. if is_js:
  1170. headers['Referer'] = getHome(url)
  1171. play_url = self.play_url
  1172. if self.play_json:
  1173. play_url = play_url.replace('&play_url=', '&type=json&play_url=')
  1174. py_ctx.update({
  1175. 'input': url,
  1176. 'TYPE': 'detail', # 海阔js环境标志
  1177. # 'VID': id, # 传递的vod_id
  1178. '二级': self.二级渲染, # 二级解析函数,可以解析dict
  1179. 'MY_CATE': fyclass, # 分类id
  1180. 'oheaders': self.d.oheaders,
  1181. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1182. 'd': self.d,
  1183. 'getParse': self.d.getParse,
  1184. 'saveParse': self.d.saveParse,
  1185. 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, 'play_url': play_url
  1186. })
  1187. ctx = py_ctx
  1188. # print(ctx)
  1189. jscode = getPreJs() + p.replace('js:', '', 1)
  1190. # print(jscode)
  1191. loader, _ = runJScode(jscode, ctx=ctx)
  1192. # print(loader.toString())
  1193. vod = loader.eval('VOD')
  1194. if isinstance(vod, JsObjectWrapper):
  1195. vod = vod.to_dict()
  1196. if show_name:
  1197. vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}'
  1198. else:
  1199. vod = self.blank_vod()
  1200. else:
  1201. vod = self.二级渲染(p, detailUrl=detailUrl, orId=orUrl, url=url, vod=vod, show_name=show_name, jsp=jsp,
  1202. fyclass=fyclass)
  1203. except Exception as e:
  1204. logger.info(f'{self.getName()}获取单个详情页{detailUrl}出错{e}')
  1205. if self.图片来源:
  1206. if vod.get('vod_pic', '') and str(vod['vod_pic']).startswith('http'):
  1207. vod['vod_pic'] = f"{vod['vod_pic']}{self.图片来源}"
  1208. if not vod.get('vod_id') or ('$' in orUrl and vod['vod_id'] != orUrl):
  1209. vod['vod_id'] = orUrl
  1210. # print(vod)
  1211. return vod
  1212. def detailContent(self, fypage, array, show_name=False):
  1213. """
  1214. cms二级数据
  1215. :param array:
  1216. :return:
  1217. """
  1218. # print('进入二级')
  1219. t1 = time()
  1220. array = array if len(array) <= self.limit else array[
  1221. (fypage - 1) * self.limit:min(self.limit * fypage, len(array))]
  1222. thread_pool = ThreadPoolExecutor(min(self.limit, len(array))) # 定义线程池来启动多线程执行此任务
  1223. obj_list = []
  1224. try:
  1225. for vod_url in array:
  1226. print(vod_url)
  1227. vod_class = ''
  1228. if vod_url.find('$') > -1:
  1229. tmp = vod_url.split('$')
  1230. vod_class = tmp[0]
  1231. vod_url = tmp[1]
  1232. obj = thread_pool.submit(self.detailOneVod, vod_url, vod_class, show_name)
  1233. obj_list.append(obj)
  1234. thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕
  1235. vod_list = [obj.result() for obj in obj_list]
  1236. result = {
  1237. 'list': vod_list
  1238. }
  1239. logger.info(
  1240. f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb')
  1241. except Exception as e:
  1242. result = {
  1243. 'list': []
  1244. }
  1245. logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,发生错误:{e}')
  1246. # print(result)
  1247. return result
  1248. def searchContent(self, key, fypage=1, show_name=False):
  1249. if self.search_encoding:
  1250. if str(self.search_encoding).lower() != 'utf-8':
  1251. key = encodeStr(key, self.search_encoding)
  1252. elif self.encoding and str(self.encoding).startswith('gb'):
  1253. # key = quote(key.encode('utf-8').decode('utf-8').encode(self.encoding,'ignore'))
  1254. key = encodeStr(key, self.encoding)
  1255. # print(key)
  1256. pg = str(fypage)
  1257. if not self.searchUrl:
  1258. return self.blank()
  1259. url = self.searchUrl.replace('**', key)
  1260. if fypage == 1 and self.test('[\[\]]', url) and '#' not in url:
  1261. url = url.split('[')[1].split(']')[0]
  1262. elif fypage > 1 and self.test('[\[\]]', url) and '#' not in url:
  1263. url = url.split('[')[0]
  1264. if url.find('fypage') > -1:
  1265. if '(' in url and ')' in url:
  1266. # url_rep = url[url.find('('):url.find(')')+1]
  1267. # cnt_page = url.split('(')[1].split(')')[0].replace('fypage',pg)
  1268. # print(url_rep)
  1269. url_rep = re.search('.*?\((.*)\)', url, re.M | re.S).groups()[0]
  1270. cnt_page = url_rep.replace('fypage', pg)
  1271. # print(url_rep)
  1272. # print(cnt_page)
  1273. cnt_ctx = {}
  1274. # exec(f'cnt_pg={cnt_page}', cnt_ctx)
  1275. safe_eval(f'cnt_pg={cnt_page}', cnt_ctx)
  1276. cnt_pg = str(cnt_ctx['cnt_pg']) if cnt_ctx.get('cnt_pg') else 1 # 计算表达式的结果
  1277. url = url.replace(url_rep, str(cnt_pg)).replace('(', '').replace(')', '')
  1278. # print(url)
  1279. else:
  1280. url = url.replace('fypage', pg)
  1281. logger.info(f'{self.getName()}搜索链接:{url}')
  1282. if not self.搜索:
  1283. return self.blank()
  1284. # p = self.一级.split(';') if self.搜索 == '*' and self.一级 else self.搜索.split(';') # 解析
  1285. p = self.一级 if self.搜索 == '*' and self.一级 else self.搜索
  1286. pp = self.一级.split(';')
  1287. jsp = jsoup(url) if url.startswith('http') else jsoup(self.url)
  1288. videos = []
  1289. is_js = isinstance(p, str) and str(p).startswith('js:') # 是js
  1290. def getPP(p, pn, pp, ppn):
  1291. try:
  1292. ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn]
  1293. return ps
  1294. except:
  1295. return ''
  1296. if is_js:
  1297. headers['Referer'] = getHome(url)
  1298. py_ctx.update({
  1299. 'input': url,
  1300. 'oheaders': self.d.oheaders,
  1301. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding},
  1302. 'd': self.d,
  1303. 'MY_PAGE': fypage,
  1304. 'KEY': key, # 搜索关键字
  1305. 'TYPE': 'search', # 海阔js环境标志
  1306. 'detailUrl': self.detailUrl or '',
  1307. # 详情页链接
  1308. 'getParse': self.d.getParse,
  1309. 'saveParse': self.d.saveParse,
  1310. 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail,
  1311. })
  1312. ctx = py_ctx
  1313. # print(ctx)
  1314. jscode = getPreJs() + p.replace('js:', '', 1)
  1315. # print(jscode)
  1316. loader, _ = runJScode(jscode, ctx=ctx)
  1317. # print(loader.toString())
  1318. vods = loader.eval('VODS')
  1319. # print(len(vods),type(vods))
  1320. if isinstance(vods, JsObjectWrapper):
  1321. videos = vods.to_list()
  1322. # print(videos)
  1323. else:
  1324. p = p.split(';')
  1325. if len(p) < 5:
  1326. return self.blank()
  1327. is_json = str(p[0]).startswith('json:')
  1328. pdfh = jsp.pjfh if is_json else jsp.pdfh
  1329. pdfa = jsp.pjfa if is_json else jsp.pdfa
  1330. pd = jsp.pj if is_json else jsp.pd
  1331. pq = jsp.pq
  1332. try:
  1333. req_method = url.split(';')[1].lower() if len(url.split(';')) > 1 else 'get'
  1334. if req_method == 'post':
  1335. rurls = url.split(';')[0].split('#')
  1336. rurl = rurls[0]
  1337. params = rurls[1] if len(rurls) > 1 else ''
  1338. # params = quote(params)
  1339. print(f'rurl:{rurl},params:{params}')
  1340. new_dict = {}
  1341. new_tmp = params.split('&')
  1342. # print(new_tmp)
  1343. for i in new_tmp:
  1344. new_dict[i.split('=')[0]] = i.split('=')[1]
  1345. # data = ujson.dumps(new_dict)
  1346. data = new_dict
  1347. # print(data)
  1348. logger.info(self.headers)
  1349. r = requests.post(rurl, headers=self.headers, data=data, timeout=self.timeout, verify=False)
  1350. elif req_method == 'postjson':
  1351. rurls = url.split(';')[0].split('#')
  1352. rurl = rurls[0]
  1353. params = rurls[1] if len(rurls) > 1 else '{}'
  1354. headers_cp = self.headers.copy()
  1355. headers_cp.update({'Content-Type': 'application/json'})
  1356. try:
  1357. params = ujson.dumps(ujson.loads(params))
  1358. except:
  1359. params = '{}'
  1360. # params = params.encode()
  1361. logger.info(headers_cp)
  1362. logger.info(params)
  1363. r = requests.post(rurl, headers=headers_cp, data=params, timeout=self.timeout, verify=False)
  1364. else:
  1365. r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False)
  1366. html = self.checkHtml(r)
  1367. if is_json:
  1368. html = self.dealJson(html)
  1369. html = json.loads(html)
  1370. # if not is_json and html.find('输入验证码') > -1:
  1371. if not is_json and re.search('系统安全验证|输入验证码', html, re.M | re.S):
  1372. cookie = verifyCode(url, self.headers, self.timeout, self.retry_count, self.ocr_api)
  1373. # cookie = ''
  1374. if not cookie:
  1375. return {
  1376. 'list': videos
  1377. }
  1378. self.saveCookie(cookie)
  1379. self.headers['cookie'] = cookie
  1380. r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False)
  1381. r.encoding = self.encoding
  1382. html = r.text
  1383. if not show_name and not str(html).find(key) > -1:
  1384. logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码')
  1385. print(html)
  1386. p0 = getPP(p, 0, pp, 0)
  1387. items = pdfa(html, p0.replace('json:', '', 1))
  1388. # print(len(items),items)
  1389. videos = []
  1390. p1 = getPP(p, 1, pp, 1)
  1391. p2 = getPP(p, 2, pp, 2)
  1392. p3 = getPP(p, 3, pp, 3)
  1393. p4 = getPP(p, 4, pp, 4)
  1394. p5 = getPP(p, 5, pp, 5)
  1395. for item in items:
  1396. # print(item)
  1397. try:
  1398. # title = pdfh(item, p[1])
  1399. title = ''.join([pdfh(item, i) for i in p1.split('||')])
  1400. try:
  1401. img = pd(item, p2)
  1402. except:
  1403. img = ''
  1404. try:
  1405. desc = pdfh(item, p3)
  1406. except:
  1407. desc = ''
  1408. if len(p) > 5 and p[5]:
  1409. content = pdfh(item, p5)
  1410. else:
  1411. content = ''
  1412. # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')])
  1413. links = [pd(item, _p4) if not self.detailUrl else pdfh(item, _p4) for _p4 in p4.split('+')]
  1414. link = '$'.join(links)
  1415. # print(content)
  1416. # sid = self.regStr(sid, "/video/(\\S+).html")
  1417. vod_id = link
  1418. if self.二级 == '*':
  1419. vod_id = vod_id + '@@' + title + '@@' + img
  1420. videos.append({
  1421. "vod_id": vod_id,
  1422. "vod_name": title,
  1423. "vod_pic": img,
  1424. "vod_remarks": desc,
  1425. "vod_content": content, # 无用参数
  1426. })
  1427. except Exception as e:
  1428. print(f'搜索列表解析发生错误:{e}')
  1429. pass
  1430. # print(videos)
  1431. except Exception as e:
  1432. logger.info(f'搜索{self.getName()}发生错误:{e}')
  1433. if self.图片来源:
  1434. for video in videos:
  1435. if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'):
  1436. video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}"
  1437. if show_name and len(videos) > 0:
  1438. for video in videos:
  1439. video['vod_name'] = self.id + ' ' + video['vod_name']
  1440. video['vod_rule'] = self.id
  1441. video['vod_id'] = video['vod_id'] + '#' + self.id
  1442. result = {
  1443. 'list': videos
  1444. }
  1445. return result
  1446. def playContent(self, play_url, jxs=None, flag=None):
  1447. # flag参数只有类型为4的时候才有,可以忽略
  1448. # logger.info('播放免嗅地址: ' + self.play_url)
  1449. # 注意:全局flags里的视频没法执行免嗅代码,因为会自动拦截去调用解析: url=yoursite:5705/vod?play_url=xxxx
  1450. if not jxs:
  1451. jxs = []
  1452. # print(play_url)
  1453. if play_url.find('http') == -1: # 字符串看起来被编码的
  1454. try:
  1455. play_url = base64Decode(play_url) # 自动base64解码
  1456. except:
  1457. pass
  1458. # print(unquote(play_url))
  1459. play_url = unquote(play_url)
  1460. origin_play_url = play_url
  1461. print(origin_play_url)
  1462. if self.lazy:
  1463. print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}')
  1464. t1 = time()
  1465. try:
  1466. if type(self.lazy) == JsObjectWrapper:
  1467. logger.info(f'lazy非纯文本免嗅失败耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
  1468. elif str(self.lazy).startswith('py:'):
  1469. pycode = runPy(self.lazy)
  1470. if pycode:
  1471. # print(pycode)
  1472. pos = pycode.find('def lazyParse')
  1473. if pos < 0:
  1474. return play_url
  1475. pyenv = safePython(self.lazy, pycode[pos:])
  1476. lazy_url = pyenv.action_task_exec('lazyParse', [play_url, self.d])
  1477. logger.info(f'py免嗅耗时:{get_interval(t1)}毫秒,播放地址:{lazy_url}')
  1478. if isinstance(lazy_url, str) and lazy_url.startswith('http'):
  1479. play_url = lazy_url
  1480. else:
  1481. jscode = str(self.lazy).strip().replace('js:', '', 1) if str(self.lazy).startswith(
  1482. 'js:') else js_code
  1483. jsp = jsoup(self.url)
  1484. # jscode = f'var input={play_url};{jscode}'
  1485. # print(jscode)
  1486. headers['Referer'] = getHome(play_url)
  1487. py_ctx.update({
  1488. 'input': play_url,
  1489. 'oheaders': self.d.oheaders,
  1490. 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout,
  1491. 'encoding': self.d.encoding},
  1492. 'd': self.d,
  1493. 'jxs': jxs,
  1494. 'getParse': self.d.getParse,
  1495. 'saveParse': self.d.saveParse,
  1496. 'jsp': jsp,
  1497. 'jq': jsp,
  1498. 'pdfh': self.d.jsp.pdfh,
  1499. 'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd, 'play_url': self.play_url
  1500. })
  1501. ctx = py_ctx
  1502. # print(ctx)
  1503. jscode = getPreJs() + jscode
  1504. # print(jscode)
  1505. loader, _ = runJScode(jscode, ctx=ctx)
  1506. # print(loader.toString())
  1507. play_url = loader.eval('input')
  1508. if isinstance(play_url, JsObjectWrapper):
  1509. play_url = play_url.to_dict()
  1510. # print(type(play_url))
  1511. # print(play_url)
  1512. logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}')
  1513. if not play_url and play_url != '' and play_url != {}:
  1514. play_url = origin_play_url
  1515. # if play_url == {}:
  1516. # play_url = None
  1517. except Exception as e:
  1518. logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}')
  1519. # return play_url
  1520. else:
  1521. logger.info(f'播放重定向到:{play_url}')
  1522. # return play_url
  1523. if self.play_json:
  1524. # 如果传了 play_json 参数并且是个大于0的列表的话
  1525. if isinstance(self.play_json, list) and len(self.play_json) > 0:
  1526. # 获取播放链接
  1527. web_url = play_url if isinstance(play_url, str) else play_url.get('url')
  1528. for pjson in self.play_json:
  1529. if pjson.get('re') and (pjson['re'] == '*' or re.search(pjson['re'], web_url, re.S | re.M)):
  1530. if pjson.get('json') and isinstance(pjson['json'], dict):
  1531. if isinstance(play_url, str):
  1532. base_json = pjson['json']
  1533. base_json['url'] = web_url
  1534. play_url = base_json
  1535. elif isinstance(play_url, dict):
  1536. base_json = pjson['json']
  1537. play_url.update(base_json)
  1538. # 不管有没有效,匹配到了就跑??? (当然不行了,要不然写来干嘛)
  1539. break
  1540. else: # 没有指定列表默认表示需要解析,解析播放 (如果不要解析,我想也是没人会去写这个参数)
  1541. base_json = {
  1542. 'jx': 1, # 解析开
  1543. 'parse': 1, # 嗅探 关 pluto这个标识有问题 只好双1了
  1544. }
  1545. if isinstance(play_url, str):
  1546. base_json['url'] = play_url
  1547. play_url = base_json
  1548. elif isinstance(play_url, dict):
  1549. play_url.update(base_json)
  1550. logger.info(f'最终返回play_url:{play_url}')
  1551. return play_url
  1552. if __name__ == '__main__':
  1553. print(urljoin('https://api.web.360kan.com/v1/f',
  1554. '//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg'))
  1555. # exit()
  1556. from utils import parser
  1557. # js_path = f'js/玩偶姐姐.js'
  1558. # js_path = f'js/555影视.js'
  1559. with open('../js/模板.js', encoding='utf-8') as f:
  1560. before = f.read().split('export')[0]
  1561. js_path = f'js/360影视.js'
  1562. ctx, js_code = parser.runJs(js_path, before=before)
  1563. ruleDict = ctx.rule.to_dict()
  1564. # lazy = ctx.eval('lazy')
  1565. # print(lazy)
  1566. # ruleDict['id'] = rule # 把路由请求的id装到字典里,后面播放嗅探才能用
  1567. cms = CMS(ruleDict)
  1568. print(cms.title)
  1569. print(cms.homeContent())
  1570. # print(cms.categoryContent('5',1))
  1571. # print(cms.categoryContent('latest',1))
  1572. # print(cms.detailContent(['https://www.2345ka.com/v/45499.html']))
  1573. # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html']))
  1574. # cms.categoryContent('dianying',1)
  1575. # print(cms.detailContent(['67391']))
  1576. # print(cms.searchContent('斗罗大陆'))
  1577. print(cms.searchContent('独行月球'))