cntv央视 46 KB

  1. # coding=utf-8
  2. # !/usr/bin/python
  3. import os.path
  4. import random
  5. import sys
  6. sys.path.append('..')
  7. try:
  8. # from base.spider import Spider as BaseSpider
  9. from base.spider import BaseSpider
  10. except ImportError:
  11. from t4.base.spider import BaseSpider
  12. import json
  13. import time
  14. import base64
  15. import datetime
  16. import re
  17. from urllib import request, parse
  18. from pathlib import Path
  19. import urllib
  20. import urllib.request
  21. """
  22. 配置示例:
  23. t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾
  24. api里会自动含有ext参数是base64编码后的选中的筛选条件
  25. 错误示例,ext含有json:
  26. {
  27. "key":"hipy_cntv央视",
  28. "name":"cntv央视(hipy_t4)",
  29. "type":4,
  30. "api":"央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  31. "searchable":1,
  32. "quickSearch":1,
  33. "filterable":0,
  34. "ext":"cntv央视.json"
  35. }
  36. 正确示例。同时存在ext和api_ext会优先取ext作为extend加载init
  37. {
  38. "key":"hipy_t4_cntv央视",
  39. "name":"cntv央视(hipy_t4)",
  40. "type":4,
  41. "api":"央视?api_ext={{host}}/txt/hipy/cntv央视.json",
  42. "searchable":1,
  43. "quickSearch":0,
  44. "filterable":1,
  45. "ext":"{{host}}/files/hipy/cntv央视.json"
  46. },
  47. {
  48. "key": "hipy_t3_cntv央视",
  49. "name": "cntv央视(hipy_t3)",
  50. "type": 3,
  51. "api": "{{host}}/txt/hipy/cntv央视.py",
  52. "searchable": 1,
  53. "quickSearch": 0,
  54. "filterable": 1,
  55. "ext": "{{host}}/files/hipy/cntv央视.json"
  56. },
  57. """
  58. class Spider(BaseSpider): # 元类 默认的元类 type
  59. module = None
  60. def getDependence(self):
  61. return ['base_spider']
  62. def getName(self):
  63. return "中央电视台" # 可搜索
  64. def init_api_ext_file(self):
  65. ext_file = __file__.replace('.py', '.json')
  66. print(f'ext_file:{ext_file}')
  67. # 特别节目网页:
  68. # 特别节目分类筛选获取页面:
  69. # 纪录片网页:
  70. # 纪录片分类筛选获取页面:
  71. # ==================== 获取特别节目的筛选条件 ======================
  72. r = self.fetch('')
  73. html = r.text
  74. html = self.html(html)
  75. filter_tbjm = []
  76. lis = html.xpath('//*[@id="pindao"]/li')
  77. li_value = []
  78. for li in lis:
  79. li_value.append({
  80. 'n': ''.join(li.xpath('./span//text()')),
  81. 'v': ''.join(li.xpath('@datacd')),
  82. })
  83. # print(li_value)
  84. filter_tbjm.append({
  85. "key": "datapd-channel",
  86. "name": "频道",
  87. "value": li_value
  88. })
  89. lis = html.xpath('//*[@id="fenlei"]/li')
  90. li_value = []
  91. for li in lis:
  92. li_value.append({
  93. 'n': ''.join(li.xpath('./span//text()')),
  94. 'v': ''.join(li.xpath('@datalx')),
  95. })
  96. # print(li_value)
  97. filter_tbjm.append({
  98. "key": "datafl-sc",
  99. "name": "类型",
  100. "value": li_value
  101. })
  102. lis = html.xpath('//*[@id="zimu"]/li')
  103. li_value = []
  104. for li in lis:
  105. li_value.append({
  106. 'n': ''.join(li.xpath('./span//text()')),
  107. 'v': ''.join(li.xpath('@datazm')),
  108. })
  109. # print(li_value)
  110. filter_tbjm.append({
  111. "key": "dataszm-letter",
  112. "name": "首字母",
  113. "value": li_value
  114. })
  115. print(filter_tbjm)
  116. # ==================== 纪录片筛选获取 ======================
  117. r = self.fetch('')
  118. html = r.text
  119. html = self.html(html)
  120. filter_jlp = []
  121. lis = html.xpath('//*[@id="pindao"]/li')
  122. li_value = []
  123. for li in lis:
  124. li_value.append({
  125. 'n': ''.join(li.xpath('./span//text()')),
  126. 'v': ''.join(li.xpath('@datacd')),
  127. })
  128. # print(li_value)
  129. filter_jlp.append({
  130. "key": "datapd-channel",
  131. "name": "频道",
  132. "value": li_value
  133. })
  134. lis = html.xpath('//*[@id="fenlei"]/li')
  135. li_value = []
  136. for li in lis:
  137. li_value.append({
  138. 'n': ''.join(li.xpath('./span//text()')),
  139. 'v': ''.join(li.xpath('@datalx')),
  140. })
  141. # print(li_value)
  142. filter_jlp.append({
  143. "key": "datafl-sc",
  144. "name": "类型",
  145. "value": li_value
  146. })
  147. lis = html.xpath('//*[@id="nianfen"]/li')
  148. li_value = []
  149. for li in lis:
  150. li_value.append({
  151. 'n': ''.join(li.xpath('./span//text()')),
  152. 'v': ''.join(li.xpath('@datanf')),
  153. })
  154. # print(li_value)
  155. filter_jlp.append({
  156. "key": "datanf-year",
  157. "name": "年份",
  158. "value": li_value
  159. })
  160. lis = html.xpath('//*[@id="zimu"]/li')
  161. li_value = []
  162. for li in lis:
  163. li_value.append({
  164. 'n': ''.join(li.xpath('./span//text()')),
  165. 'v': ''.join(li.xpath('@datazm')),
  166. })
  167. # print(li_value)
  168. filter_jlp.append({
  169. "key": "dataszm-letter",
  170. "name": "首字母",
  171. "value": li_value
  172. })
  173. print(filter_jlp)
  174. ext_file_dict = {
  175. "特别节目": filter_tbjm,
  176. "纪录片": filter_jlp,
  177. }
  178. # print(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  179. with open(ext_file, mode='w+', encoding='utf-8') as f:
  180. # f.write(json.dumps(ext_file_dict,ensure_ascii=False,indent=4))
  181. f.write(json.dumps(ext_file_dict, ensure_ascii=False))
  182. def init(self, extend=""):
  183. def init_file(ext_file):
  184. ext_file = Path(ext_file).as_posix()
  185. # print(f'ext_file:{ext_file}')
  186. if os.path.exists(ext_file):
  187. # print('存在扩展文件')
  188. with open(ext_file, mode='r', encoding='utf-8') as f:
  189. try:
  190. ext_dict = json.loads(
  191. # print(ext_dict)
  192. self.config['filter'].update(ext_dict)
  193. except Exception as e:
  194. print(f'更新扩展筛选条件发生错误:{e}')
  195. print("============依赖列表:{0}============".format(extend))
  196. ext = self.extend
  197. print("============ext:{0}============".format(ext))
  198. if isinstance(ext, str) and ext:
  199. if ext.startswith('./'):
  200. ext_file = os.path.join(os.path.dirname(__file__), ext)
  201. init_file(ext_file)
  202. elif ext.startswith('http'):
  203. try:
  204. r = self.fetch(ext)
  205. self.config['filter'].update(r.json())
  206. except Exception as e:
  207. print(f'更新扩展筛选条件发生错误:{e}')
  208. elif not ext.startswith('./') and not ext.startswith('http'):
  209. ext_file = os.path.join(os.path.dirname(__file__), './' + ext + '.json')
  210. init_file(ext_file)
  211. # ==================== 栏目大全加载年月筛选 ======================
  212. lanmu_list = self.config['filter']['栏目大全']
  213. lanmu_keys_list = [lanmu['key'] for lanmu in lanmu_list]
  214. if 'year' not in lanmu_keys_list:
  215. currentYear =
  216. yearList = [{"n": "全部", "v": ""}]
  217. for year in range(currentYear, currentYear - 10, -1):
  218. yearList.append({"n": year, "v": year})
  219. yearDict = {"key": "year", "name": "年份", "value": yearList}
  220. lanmu_list.append(yearDict)
  221. if 'month' not in lanmu_keys_list:
  222. monthList = [{"n": "全部", "v": ""}]
  223. for month in range(1, 13):
  224. text = str(month).rjust(2, '0')
  225. monthList.append({"n": text, "v": text})
  226. monthDict = {"key": "month", "name": "月份", "value": monthList}
  227. lanmu_list.append(monthDict)
  228. # 装载模块,这里只要一个就够了
  229. if isinstance(extend, list):
  230. for lib in extend:
  231. if '.Spider' in str(type(lib)):
  232. self.module = lib
  233. break
  234. def isVideoFormat(self, url):
  235. pass
  236. def manualVideoCheck(self):
  237. pass
  238. def homeContent(self, filter):
  239. result = {}
  240. cateManual = {
  241. "4K专区": "4K专区",
  242. "栏目大全": "栏目大全",
  243. "特别节目": "特别节目",
  244. "纪录片": "纪录片",
  245. "电视剧": "电视剧",
  246. "动画片": "动画片",
  247. "频道直播": "频道直播",
  248. }
  249. classes = []
  250. for k in cateManual:
  251. classes.append({
  252. 'type_name': k,
  253. 'type_id': cateManual[k]
  254. })
  255. result['class'] = classes
  256. if (filter):
  257. result['filters'] = self.config['filter']
  258. return result
  259. def homeVideoContent(self):
  260. result = {
  261. 'list': []
  262. }
  263. if self.module:
  264. result = self.module.homeVideoContent()
  265. return result
  266. def categoryContent(self, tid, pg, filter, extend):
  267. result = {}
  268. month = "" # 月
  269. year = "" # 年
  270. area = '' # 地区
  271. channel = '' # 频道
  272. datafl = '' # 类型
  273. letter = '' # 字母
  274. year_prefix = '' # 栏目大全的年月筛选过滤
  275. pagecount = 24
  276. if tid == '动画片':
  277. id = urllib.parse.quote(tid)
  278. if 'datadq-area' in extend.keys():
  279. area = urllib.parse.quote(extend['datadq-area'])
  280. if 'dataszm-letter' in extend.keys():
  281. letter = extend['dataszm-letter']
  282. if 'datafl-sc' in extend.keys():
  283. datafl = urllib.parse.quote(extend['datafl-sc'])
  284. url = '{0}&sc={4}&fc={1}&letter={2}&p={3}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  285. area, id, letter, pg, datafl)
  286. elif tid == '纪录片':
  287. id = urllib.parse.quote(tid)
  288. if 'datapd-channel' in extend.keys():
  289. channel = urllib.parse.quote(extend['datapd-channel'])
  290. if 'datafl-sc' in extend.keys():
  291. datafl = urllib.parse.quote(extend['datafl-sc'])
  292. if 'datanf-year' in extend.keys():
  293. year = extend['datanf-year']
  294. if 'dataszm-letter' in extend.keys():
  295. letter = extend['dataszm-letter']
  296. url = '{0}&channel={1}&sc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  297. id, channel, datafl, year, letter, pg)
  298. elif tid == '电视剧':
  299. id = urllib.parse.quote(tid)
  300. if 'datafl-sc' in extend.keys():
  301. datafl = urllib.parse.quote(extend['datafl-sc'])
  302. if 'datanf-year' in extend.keys():
  303. year = extend['datanf-year']
  304. if 'dataszm-letter' in extend.keys():
  305. letter = extend['dataszm-letter']
  306. url = '{0}&sc={1}&fc={2}&year={3}&letter={4}&p={5}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  307. area, datafl, id, year, letter, pg)
  308. elif tid == '特别节目':
  309. id = urllib.parse.quote(tid)
  310. if 'datapd-channel' in extend.keys():
  311. channel = urllib.parse.quote(extend['datapd-channel'])
  312. if 'datafl-sc' in extend.keys():
  313. datafl = urllib.parse.quote(extend['datafl-sc'])
  314. if 'dataszm-letter' in extend.keys():
  315. letter = extend['dataszm-letter']
  316. url = '{0}&sc={1}&fc={2}&bigday=&letter={3}&p={4}&n=24&serviceId=tvcctv&topv=1&t=json'.format(
  317. channel, datafl, id, letter, pg)
  318. elif tid == '栏目大全':
  319. cid = '' # 频道
  320. if 'cid' in extend.keys():
  321. cid = extend['cid']
  322. fc = '' # 分类
  323. if 'fc' in extend.keys():
  324. fc = extend['fc']
  325. fl = '' # 字母
  326. if 'fl' in extend.keys():
  327. fl = extend['fl']
  328. year = extend.get('year') or ''
  329. month = extend.get('month') or ''
  330. if year:
  331. year_prefix = year + month
  332. url = '{0}&fc={1}&cid={2}&p={3}&n=20&serviceId=tvcctv&t=json&cb=ko'.format(
  333. fl, fc, cid, pg)
  334. pagecount = 20
  335. elif tid == '4K专区':
  336. cid = 'CHAL1558416868484111'
  337. url = '{0}&p={1}&n={2}&t=json&cb=ko'.format(
  338. cid, pg, pagecount
  339. )
  340. elif tid == '频道直播':
  341. url = ''
  342. else:
  343. url = ''
  344. videos = []
  345. htmlText = self.fetch(url).text
  346. if tid == '栏目大全':
  347. index = htmlText.rfind(');')
  348. if index > -1:
  349. htmlText = htmlText[3:index]
  350. videos = self.get_list1(html=htmlText, tid=tid, year_prefix=year_prefix)
  351. elif tid == '4K专区':
  352. index = htmlText.rfind(');')
  353. if index > -1:
  354. htmlText = htmlText[3:index]
  355. videos = self.get_list_4k(html=htmlText, tid=tid)
  356. elif tid == '频道直播':
  357. html = self.html(htmlText)
  358. lis = html.xpath('//*[@id="jiemudan01"]//div[contains(@class,"channel_con")]//ul/li')
  359. for li in lis:
  360. vid = ''.join(li.xpath('./img/@title'))
  361. pic = ''.join(li.xpath('./img/@src'))
  362. pic = self.urljoin('', pic)
  363. videos.append({
  364. 'vod_id': '||'.join([tid, vid, f'{vid}/', pic]),
  365. 'vod_name': vid,
  366. 'vod_pic': pic,
  367. 'vod_mark': '',
  368. })
  369. else:
  370. videos = self.get_list(html=htmlText, tid=tid)
  371. # print(videos)
  372. result['list'] = videos
  373. result['page'] = pg
  374. result['pagecount'] = 9999 if len(videos) >= pagecount else pg
  375. result['limit'] = 90
  376. result['total'] = 999999
  377. return result
  378. def detailContent(self, array):
  379. result = {}
  380. year_prefix = ''
  381. did = array[0]
  382. if '$$$' in did:
  383. year_prefix = did.split('$$$')[0]
  384. did = did.split('$$$')[1]
  385. aid = did.split('||')
  386. tid = aid[0]
  387. title = aid[1]
  388. lastVideo = aid[2]
  389. logo = aid[3]
  390. if tid == '频道直播':
  391. vod = {
  392. "vod_id": did,
  393. "vod_name": title.replace(' ', ''),
  394. "vod_pic": logo,
  395. "vod_content": f'频道{title}正在直播中',
  396. "vod_play_from": '道长在线直播',
  397. "vod_play_url": f'在线观看${title}||{lastVideo}',
  398. }
  399. result = {'list': [vod]}
  400. return result
  401. id = aid[4]
  402. vod_year = aid[5]
  403. actors = aid[6] if len(aid) > 6 else ''
  404. brief = aid[7] if len(aid) > 7 else '' # get请求最长255,这个描述会有可能直接被干没了。
  405. fromId = 'CCTV'
  406. if tid == "栏目大全":
  407. lastUrl = '{0}&serviceId=tvcctv'.format(id)
  408. # htmlTxt = self.webReadFile(urlStr=lastUrl, header=self.header)
  409. htmlTxt = self.fetch(lastUrl).text
  410. topicId = json.loads(htmlTxt)['ctid']
  411. url = ''
  412. # params = {
  413. # 'p': '1',
  414. # 'n': '100',
  415. # 't': 'json',
  416. # 'mode': '0',
  417. # 'sort': 'desc',
  418. # 'serviceId': 'tvcctv',
  419. # 'd': year_prefix,
  420. # 'id': topicId
  421. # }
  422. # htmlTxt = self.fetch(url,data=params).text
  423. Url = "{0}?id={1}&d=&p=1&n=100&sort=desc&mode=0&serviceId=tvcctv&t=json&d={2}".format(
  424. url, topicId, year_prefix)
  425. elif tid == "4K专区":
  426. Url = '{0}&serviceId=cctv4k&p=1&n=100&mode=0&pub=1'.format(
  427. id)
  428. print(Url)
  429. else:
  430. Url = '{0}&serviceId=tvcctv&p=1&n=100&mode=0&pub=1'.format(
  431. id)
  432. jRoot = ''
  433. videoList = []
  434. try:
  435. if tid == "搜索":
  436. fromId = '中央台'
  437. videoList = [title + "$" + lastVideo]
  438. else:
  439. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  440. htmlTxt = self.fetch(Url).text
  441. jRoot = json.loads(htmlTxt)
  442. data = jRoot['data']
  443. jsonList = data['list']
  444. videoList = self.get_EpisodesList(jsonList=jsonList)
  445. if len(videoList) < 1:
  446. # htmlTxt = self.webReadFile(urlStr=lastVideo, header=self.header)
  447. htmlTxt = self.fetch(lastVideo).text
  448. if tid == "电视剧" or tid == "纪录片" or tid == "4K专区":
  449. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  450. elif tid == "特别节目":
  451. patternTxt = r'class="tp1"><a\s*href="(?P<url>https://.+?)"\s*target="_blank"\s*title="(?P<title>.+?)"></a></div>'
  452. elif tid == "动画片":
  453. patternTxt = r"'title':\s*'(?P<title>.+?)',\n{0,1}\s*'img':\s*'(.+?)',\n{0,1}\s*'brief':\s*'(.+?)',\n{0,1}\s*'url':\s*'(?P<url>.+?)'"
  454. elif tid == "栏目大全":
  455. patternTxt = r'href="(?P<url>.+?)" target="_blank" alt="(?P<title>.+?)" title=".+?">'
  456. videoList = self.get_EpisodesList_re(htmlTxt=htmlTxt, patternTxt=patternTxt)
  457. fromId = '央视'
  458. except:
  459. pass
  460. if len(videoList) == 0:
  461. return {}
  462. vod = {
  463. "vod_id": did,
  464. "vod_name": title.replace(' ', ''),
  465. "vod_pic": logo,
  466. "type_name": tid,
  467. "vod_year": vod_year,
  468. "vod_area": "",
  469. "vod_remarks": '',
  470. "vod_actor": actors,
  471. "vod_director": '',
  472. "vod_content": brief
  473. }
  474. vod['vod_play_from'] = fromId
  475. vod['vod_play_url'] = "#".join(videoList)
  476. result = {
  477. 'list': [
  478. vod
  479. ]
  480. }
  481. return result
  482. def get_lineList(self, Txt, mark, after):
  483. circuit = []
  484. origin = Txt.find(mark)
  485. while origin > 8:
  486. end = Txt.find(after, origin)
  487. circuit.append(Txt[origin:end])
  488. origin = Txt.find(mark, end)
  489. return circuit
  490. def get_RegexGetTextLine(self, Text, RegexText, Index):
  491. returnTxt = []
  492. pattern = re.compile(RegexText, re.M | re.S)
  493. ListRe = pattern.findall(Text)
  494. if len(ListRe) < 1:
  495. return returnTxt
  496. for value in ListRe:
  497. returnTxt.append(value)
  498. return returnTxt
  499. def searchContent(self, key, quick, pg=1):
  500. key = urllib.parse.quote(key)
  501. Url = '{0}&sort=relevance&pageSize=20&type=video&vtime=-1&datepid=1&channel=&pageflag=0&qtext_str={0}'.format(
  502. key)
  503. # htmlTxt = self.webReadFile(urlStr=Url, header=self.header)
  504. htmlTxt = self.fetch(Url).text
  505. videos = self.get_list_search(html=htmlTxt, tid='搜索')
  506. result = {
  507. 'list': videos
  508. }
  509. return result
  510. def playerContent(self, flag, id, vipFlags):
  511. result = {}
  512. url = ''
  513. parse = 0
  514. headers = {
  515. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
  516. }
  517. if flag == 'CCTV':
  518. url = self.get_m3u8(urlTxt=id)
  519. elif flag == '道长在线直播':
  520. # _url = id
  521. title = id.split('||')[0] # 获取标题
  522. _url = f'{title}&channel_id={title}'
  523. htmlTxt = self.fetch(_url).text
  524. # print(htmlTxt)
  525. vdata = self.regStr(htmlTxt, "var .*?=.*?'(.*?)';")
  526. vdata = self.str2json(vdata)
  527. print(vdata)
  528. url = vdata['hls_url']['hls1']
  529. print(url)
  530. url = self.fixm3u8_url(url)
  531. else:
  532. try:
  533. # htmlTxt = self.webReadFile(urlStr=id, header=self.header)
  534. htmlTxt = self.fetch(id).text
  535. guid = self.get_RegexGetText(Text=htmlTxt, RegexText=r'var\sguid\s*=\s*"(.+?)";', Index=1)
  536. url = self.get_m3u8(urlTxt=guid)
  537. except:
  538. url = id
  539. parse = 1
  540. if url.find('https:') < 0:
  541. url = id
  542. parse = 1
  543. result["parse"] = parse # 1=嗅探,0=播放
  544. result["playUrl"] = ''
  545. result["url"] = url
  546. result["header"] = headers
  547. return result
  548. # 分类抓取地址:
  549. # 栏目大全:
  550. # 电视剧:
  551. # 动画片:
  552. # 记录片:
  553. # 特别节目:
  554. config = {
  555. "player": {},
  556. "filter": {
  557. "电视剧": [
  558. {"key": "datafl-sc", "name": "类型",
  559. "value": [{"n": "全部", "v": ""}, {"n": "谍战", "v": "谍战"}, {"n": "悬疑", "v": "悬疑"},
  560. {"n": "刑侦", "v": "刑侦"}, {"n": "历史", "v": "历史"}, {"n": "古装", "v": "古装"},
  561. {"n": "武侠", "v": "武侠"}, {"n": "军旅", "v": "军旅"}, {"n": "战争", "v": "战争"},
  562. {"n": "喜剧", "v": "喜剧"}, {"n": "青春", "v": "青春"}, {"n": "言情", "v": "言情"},
  563. {"n": "偶像", "v": "偶像"}, {"n": "家庭", "v": "家庭"}, {"n": "年代", "v": "年代"},
  564. {"n": "革命", "v": "革命"}, {"n": "农村", "v": "农村"}, {"n": "都市", "v": "都市"},
  565. {"n": "其他", "v": "其他"}]},
  566. {"key": "datadq-area", "name": "地区",
  567. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "中国香港", "v": "香港"},
  568. {"n": "美国", "v": "美国"}, {"n": "欧洲", "v": "欧洲"}, {"n": "泰国", "v": "泰国"}]},
  569. {"key": "datanf-year", "name": "年份",
  570. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  571. {"n": "2022", "v": "2022"},
  572. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  573. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  574. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  575. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  576. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}, {"n": "2007", "v": "2007"},
  577. {"n": "2006", "v": "2006"}, {"n": "2005", "v": "2005"}, {"n": "2004", "v": "2004"},
  578. {"n": "2003", "v": "2003"}, {"n": "2002", "v": "2002"}, {"n": "2001", "v": "2001"},
  579. {"n": "2000", "v": "2000"}, {"n": "1999", "v": "1999"}, {"n": "1998", "v": "1998"},
  580. {"n": "1997", "v": "1997"}]},
  581. {"key": "dataszm-letter", "name": "字母",
  582. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  583. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  584. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  585. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  586. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  587. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  588. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  589. ],
  590. "动画片": [
  591. {"key": "datafl-sc", "name": "类型",
  592. "value": [{"n": "全部", "v": ""}, {"n": "亲子", "v": "亲子"}, {"n": "搞笑", "v": "搞笑"},
  593. {"n": "冒险", "v": "冒险"}, {"n": "动作", "v": "动作"}, {"n": "宠物", "v": "宠物"},
  594. {"n": "体育", "v": "体育"}, {"n": "益智", "v": "益智"}, {"n": "历史", "v": "历史"},
  595. {"n": "教育", "v": "教育"}, {"n": "校园", "v": "校园"}, {"n": "言情", "v": "言情"},
  596. {"n": "武侠", "v": "武侠"}, {"n": "经典", "v": "经典"}, {"n": "未来", "v": "未来"},
  597. {"n": "古代", "v": "古代"}, {"n": "神话", "v": "神话"}, {"n": "真人", "v": "真人"},
  598. {"n": "励志", "v": "励志"}, {"n": "热血", "v": "热血"}, {"n": "奇幻", "v": "奇幻"},
  599. {"n": "童话", "v": "童话"}, {"n": "剧情", "v": "剧情"}, {"n": "夺宝", "v": "夺宝"},
  600. {"n": "其他", "v": "其他"}]},
  601. {"key": "datadq-area", "name": "地区",
  602. "value": [{"n": "全部", "v": ""}, {"n": "中国大陆", "v": "中国大陆"}, {"n": "美国", "v": "美国"},
  603. {"n": "欧洲", "v": "欧洲"}]},
  604. {"key": "dataszm-letter", "name": "字母",
  605. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  606. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  607. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  608. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  609. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  610. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  611. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  612. ],
  613. "纪录片": [
  614. {"key": "datafl-sc", "name": "类型",
  615. "value": [{"n": "全部", "v": ""}, {"n": "人文历史", "v": "人文历史"}, {"n": "人物", "v": "人物"},
  616. {"n": "军事", "v": "军事"}, {"n": "探索", "v": "探索"}, {"n": "社会", "v": "社会"},
  617. {"n": "时政", "v": "时政"}, {"n": "经济", "v": "经济"}, {"n": "科技", "v": "科技"}]},
  618. {"key": "datanf-year", "name": "年份",
  619. "value": [{"n": "全部", "v": ""}, {"n": "2024", "v": "2024"}, {"n": "2023", "v": "2023"},
  620. {"n": "2022", "v": "2022"},
  621. {"n": "2021", "v": "2021"}, {"n": "2020", "v": "2020"}, {"n": "2019", "v": "2019"},
  622. {"n": "2018", "v": "2018"}, {"n": "2017", "v": "2017"}, {"n": "2016", "v": "2016"},
  623. {"n": "2015", "v": "2015"}, {"n": "2014", "v": "2014"}, {"n": "2013", "v": "2013"},
  624. {"n": "2012", "v": "2012"}, {"n": "2011", "v": "2011"}, {"n": "2010", "v": "2010"},
  625. {"n": "2009", "v": "2009"}, {"n": "2008", "v": "2008"}]},
  626. {"key": "dataszm-letter", "name": "字母",
  627. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  628. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  629. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  630. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  631. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  632. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  633. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  634. ],
  635. "特别节目": [
  636. {"key": "datafl-sc", "name": "类型",
  637. "value": [{"n": "全部", "v": ""}, {"n": "全部", "v": "全部"}, {"n": "新闻", "v": "新闻"},
  638. {"n": "经济", "v": "经济"}, {"n": "综艺", "v": "综艺"}, {"n": "体育", "v": "体育"},
  639. {"n": "军事", "v": "军事"}, {"n": "影视", "v": "影视"}, {"n": "科教", "v": "科教"},
  640. {"n": "戏曲", "v": "戏曲"}, {"n": "青少", "v": "青少"}, {"n": "音乐", "v": "音乐"},
  641. {"n": "社会", "v": "社会"}, {"n": "公益", "v": "公益"}, {"n": "其他", "v": "其他"}]},
  642. {"key": "dataszm-letter", "name": "字母",
  643. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "C", "v": "C"}, {"n": "E", "v": "E"},
  644. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"}, {"n": "I", "v": "I"},
  645. {"n": "J", "v": "J"}, {"n": "K", "v": "K"}, {"n": "L", "v": "L"}, {"n": "M", "v": "M"},
  646. {"n": "N", "v": "N"}, {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  647. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"}, {"n": "U", "v": "U"},
  648. {"n": "V", "v": "V"}, {"n": "W", "v": "W"}, {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"},
  649. {"n": "Z", "v": "Z"}, {"n": "0-9", "v": "0-9"}]}
  650. ],
  651. "栏目大全": [{"key": "cid", "name": "频道",
  652. "value": [{"n": "全部", "v": ""}, {"n": "CCTV-1综合", "v": "EPGC1386744804340101"},
  653. {"n": "CCTV-2财经", "v": "EPGC1386744804340102"},
  654. {"n": "CCTV-3综艺", "v": "EPGC1386744804340103"},
  655. {"n": "CCTV-4中文国际", "v": "EPGC1386744804340104"},
  656. {"n": "CCTV-5体育", "v": "EPGC1386744804340107"},
  657. {"n": "CCTV-6电影", "v": "EPGC1386744804340108"},
  658. {"n": "CCTV-7国防军事", "v": "EPGC1386744804340109"},
  659. {"n": "CCTV-8电视剧", "v": "EPGC1386744804340110"},
  660. {"n": "CCTV-9纪录", "v": "EPGC1386744804340112"},
  661. {"n": "CCTV-10科教", "v": "EPGC1386744804340113"},
  662. {"n": "CCTV-11戏曲", "v": "EPGC1386744804340114"},
  663. {"n": "CCTV-12社会与法", "v": "EPGC1386744804340115"},
  664. {"n": "CCTV-13新闻", "v": "EPGC1386744804340116"},
  665. {"n": "CCTV-14少儿", "v": "EPGC1386744804340117"},
  666. {"n": "CCTV-15音乐", "v": "EPGC1386744804340118"},
  667. {"n": "CCTV-16奥林匹克", "v": "EPGC1634630207058998"},
  668. {"n": "CCTV-17农业农村", "v": "EPGC1563932742616872"},
  669. {"n": "CCTV-5+体育赛事", "v": "EPGC1468294755566101"}]},
  670. {"key": "fc", "name": "分类",
  671. "value": [{"n": "全部", "v": ""}, {"n": "新闻", "v": "新闻"}, {"n": "体育", "v": "体育"},
  672. {"n": "综艺", "v": "综艺"}, {"n": "健康", "v": "健康"}, {"n": "生活", "v": "生活"},
  673. {"n": "科教", "v": "科教"}, {"n": "经济", "v": "经济"}, {"n": "农业", "v": "农业"},
  674. {"n": "法治", "v": "法治"}, {"n": "军事", "v": "军事"}, {"n": "少儿", "v": "少儿"},
  675. {"n": "动画", "v": "动画"}, {"n": "纪实", "v": "纪实"}, {"n": "戏曲", "v": "戏曲"},
  676. {"n": "音乐", "v": "音乐"}, {"n": "影视", "v": "影视"}]},
  677. {"key": "fl", "name": "字母",
  678. "value": [{"n": "全部", "v": ""}, {"n": "A", "v": "A"}, {"n": "B", "v": "B"},
  679. {"n": "C", "v": "C"}, {"n": "D", "v": "D"}, {"n": "E", "v": "E"},
  680. {"n": "F", "v": "F"}, {"n": "G", "v": "G"}, {"n": "H", "v": "H"},
  681. {"n": "I", "v": "I"}, {"n": "J", "v": "J"}, {"n": "K", "v": "K"},
  682. {"n": "L", "v": "L"}, {"n": "M", "v": "M"}, {"n": "N", "v": "N"},
  683. {"n": "O", "v": "O"}, {"n": "P", "v": "P"}, {"n": "Q", "v": "Q"},
  684. {"n": "R", "v": "R"}, {"n": "S", "v": "S"}, {"n": "T", "v": "T"},
  685. {"n": "U", "v": "U"}, {"n": "V", "v": "V"}, {"n": "W", "v": "W"},
  686. {"n": "X", "v": "X"}, {"n": "Y", "v": "Y"}, {"n": "Z", "v": "Z"}]},
  687. ]
  688. }
  689. }
  690. header = {
  691. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
  692. "Host": "",
  693. "Referer": ""
  694. }
  695. def localProxy(self, params):
  696. return [200, "video/MP2T", ""]
  697. # -----------------------------------------------自定义函数-----------------------------------------------
  698. # 访问网页
  699. def webReadFile(self, urlStr, header):
  700. html = ''
  701. req = urllib.request.Request(url=urlStr) # ,headers=header
  702. with urllib.request.urlopen(req) as response:
  703. html ='utf-8')
  704. return html
  705. # 判断网络地址是否存在
  706. def TestWebPage(self, urlStr, header):
  707. html = ''
  708. req = urllib.request.Request(url=urlStr, method='HEAD') # ,headers=header
  709. with urllib.request.urlopen(req) as response:
  710. html = response.getcode()
  711. return html
  712. # 正则取文本
  713. def get_RegexGetText(self, Text, RegexText, Index):
  714. returnTxt = ""
  715. Regex =, Text, re.M | re.S)
  716. if Regex is None:
  717. returnTxt = ""
  718. else:
  719. returnTxt =
  720. return returnTxt
  721. # 取集数
  722. def get_EpisodesList(self, jsonList):
  723. videos = []
  724. for vod in jsonList:
  725. url = vod['guid']
  726. title = vod['title']
  727. if len(url) == 0:
  728. continue
  729. videos.append(title + "$" + url)
  730. return videos
  731. # 取集数
  732. def get_EpisodesList_re(self, htmlTxt, patternTxt):
  733. ListRe = re.finditer(patternTxt, htmlTxt, re.M | re.S)
  734. videos = []
  735. for vod in ListRe:
  736. url ='url')
  737. title ='title')
  738. if len(url) == 0:
  739. continue
  740. videos.append(title + "$" + url)
  741. return videos
  742. # 取剧集区
  743. def get_lineList(self, Txt, mark, after):
  744. circuit = []
  745. origin = Txt.find(mark)
  746. while origin > 8:
  747. end = Txt.find(after, origin)
  748. circuit.append(Txt[origin:end])
  749. origin = Txt.find(mark, end)
  750. return circuit
  751. # 正则取文本,返回数组
  752. def get_RegexGetTextLine(self, Text, RegexText, Index):
  753. returnTxt = []
  754. pattern = re.compile(RegexText, re.M | re.S)
  755. ListRe = pattern.findall(Text)
  756. if len(ListRe) < 1:
  757. return returnTxt
  758. for value in ListRe:
  759. returnTxt.append(value)
  760. return returnTxt
  761. # 删除html标签
  762. def removeHtml(self, txt):
  763. soup = re.compile(r'<[^>]+>', re.S)
  764. txt = soup.sub('', txt)
  765. return txt.replace("&nbsp;", " ")
  766. def hookM3u8(self, url):
  767. """
  769. JavaScript:$.ajaxSettings.async = false; var s = ""; let a = $.get(vodh5player.playerList[0].ads.contentSrc); for (var m = 0; m < a.responseText.match(/asp.*?m3u8/g).length; m++) { s = s + "" + a.responseText.match(/asp.*?m3u8/g)[m].slice(7) + "\n\n" }; var blob = new Blob([s], { type: "text/plain" }); var url = URL.createObjectURL(blob);;
  770. @param url:
  771. @return:
  772. """
  773. url = url or ''
  774. hook1 = lambda x: x.replace('asp/', 'asp//', 1)
  775. hook2 = lambda x: x.replace('hls/', 'hls//', 1)
  776. hook3 = lambda x: x.replace('', '', 1)
  777. hooks = [hook1, hook2, hook3]
  778. hook = random.choice(hooks)
  779. return hook(url)
  780. # 取m3u8
  781. def get_m3u8(self, urlTxt):
  782. """
  784. JavaScript:jQuery.getJSON(""+guid,function(result){document.writeln(;});
  786. @param urlTxt:
  787. @return:
  788. """
  789. url = "{0}".format(urlTxt)
  790. # htmlTxt = self.webReadFile(urlStr=url, header=self.header)
  791. htmlTxt = self.fetch(url).text
  792. jo = json.loads(htmlTxt)
  793. link = jo['hls_url'].strip()
  794. # print('hls_url:',link)
  795. # 获取域名前缀
  796. urlPrefix = self.get_RegexGetText(Text=link, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  797. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  798. # /asp/h5e/hls/2000/0303000a/3/default/3628bb15af644f588dc91ec68425b9ac/2000.m3u8
  799. new_link = link.replace(f'{urlPrefix}/asp/hls/', '').split('?')[0]
  800. # print('new_link:',new_link)
  801. html = self.webReadFile(urlStr=new_link, header=self.header)
  802. content = html.strip()
  803. arr = content.split('\n')
  804. subUrl = arr[-1].split('/')
  805. # hdUrl = urlPrefix + arr[-1]
  806. # subUrl[3] = '2000'
  807. # subUrl[-1] = '2000.m3u8'
  808. # hdUrl = urlPrefix + '/'.join(subUrl)
  809. maxVideo = subUrl[-1].replace('.m3u8', '')
  810. hdUrl = link.replace('main', maxVideo)
  811. hdUrl = hdUrl.replace(urlPrefix, '')
  812. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  813. if hdRsp == 200:
  814. url = hdUrl.split('?')[0]
  815. url = self.hookM3u8(url)
  816. self.log(f'视频链接: {url}')
  817. else:
  818. url = ''
  819. return url
  820. def fixm3u8_url(self, url):
  821. # 获取域名前缀
  822. urlPrefix = self.get_RegexGetText(Text=url, RegexText='(http[s]?://[a-zA-z0-9.]+)/', Index=1)
  823. # 域名前缀指定替换,然后可以获取到更高质量的视频列表
  824. new_link = url.split('?')[0]
  825. # print(new_link)
  826. html = self.webReadFile(urlStr=new_link, header=self.header)
  827. content = html.strip()
  828. # print(content)
  829. arr = content.split('\n')
  830. subUrl = arr[3] if 'EXT-X-VERSION' in content else arr[2]
  831. hdUrl = self.urljoin(new_link, subUrl).split('?')[0]
  832. # hdUrl = hdUrl.replace(urlPrefix, '')
  833. hdRsp = self.TestWebPage(urlStr=hdUrl, header=self.header)
  834. if hdRsp == 200:
  835. url = hdUrl
  836. self.log(f'视频链接: {url}')
  837. else:
  838. url = ''
  839. return url
  840. # 搜索
  841. def get_list_search(self, html, tid):
  842. jRoot = json.loads(html)
  843. jsonList = jRoot['list']
  844. videos = []
  845. for vod in jsonList:
  846. url = vod['urllink']
  847. title = self.removeHtml(txt=vod['title'])
  848. img = vod['imglink']
  849. id = vod['id']
  850. brief = vod['channel']
  851. year = vod['uploadtime']
  852. if len(url) == 0:
  853. continue
  854. guids = [tid, title, url, img, id, year, '', brief]
  855. guid = "||".join(guids)
  856. videos.append({
  857. "vod_id": guid,
  858. "vod_name": title,
  859. "vod_pic": img,
  860. "vod_remarks": year
  861. })
  862. return videos
  863. def get_list1(self, html, tid, year_prefix=None):
  864. jRoot = json.loads(html)
  865. videos = []
  866. data = jRoot['response']
  867. if data is None:
  868. return []
  869. jsonList = data['docs']
  870. for vod in jsonList:
  871. id = vod['lastVIDE']['videoSharedCode']
  872. desc = vod['lastVIDE']['videoTitle']
  873. title = vod['column_name']
  874. url = vod['column_website']
  875. img = vod['column_logo']
  876. year = vod['column_playdate']
  877. brief = vod['column_brief']
  878. actors = ''
  879. if len(url) == 0:
  880. continue
  881. guids = [tid, title, url, img, id, year, actors, brief]
  882. guid = "||".join(guids)
  883. # print(vod_id)
  884. videos.append({
  885. "vod_id": year_prefix + '$$$' + guid if year_prefix else guid,
  886. "vod_name": title,
  887. "vod_pic": img,
  888. "vod_remarks": desc.split('》')[1].strip() if '》' in desc else desc.strip()
  889. })
  890. # print(videos)
  891. return videos
  892. # 分类取结果
  893. def get_list(self, html, tid):
  894. jRoot = json.loads(html)
  895. videos = []
  896. data = jRoot['data']
  897. if data is None:
  898. return []
  899. jsonList = data['list']
  900. for vod in jsonList:
  901. url = vod['url']
  902. title = vod['title']
  903. img = vod['image']
  904. id = vod['id']
  905. try:
  906. brief = vod['brief']
  907. except:
  908. brief = ''
  909. try:
  910. year = vod['year']
  911. except:
  912. year = ''
  913. try:
  914. actors = vod['actors']
  915. except:
  916. actors = ''
  917. if len(url) == 0:
  918. continue
  919. guids = [tid, title, url, img, id, year, actors, brief]
  920. guid = "||".join(guids)
  921. # print(vod_id)
  922. videos.append({
  923. "vod_id": guid,
  924. "vod_name": title,
  925. "vod_pic": img,
  926. "vod_remarks": ''
  927. })
  928. return videos
  929. # 4k分类取结果
  930. def get_list_4k(self, html, tid):
  931. jRoot = json.loads(html)
  932. videos = []
  933. data = jRoot['data']
  934. if data is None:
  935. return []
  936. jsonList = data['list']
  937. for vod in jsonList:
  938. vod_remarks = vod['title']
  939. id = vod['id']
  940. vod = vod['last_video']
  941. img = vod['image']
  942. url = vod['url']
  943. title = vod['title']
  944. brief = vod.get('brief') or ''
  945. year = vod.get('year') or ''
  946. actors = vod.get('actors') or ''
  947. if len(url) == 0:
  948. continue
  949. guids = [tid, title, url, img, id, year, actors, brief]
  950. guid = "||".join(guids)
  951. # print(vod_id)
  952. videos.append({
  953. "vod_id": guid,
  954. "vod_name": title,
  955. "vod_pic": img,
  956. "vod_remarks": vod_remarks
  957. })
  958. return videos
  959. if __name__ == '__main__':
  960. from t4.core.loader import t4_spider_init
  961. spider = Spider()
  962. t4_spider_init(spider)
  963. # print(spider.homeContent(True))
  964. # print(spider.homeVideoContent())
  965. # spider.init_api_ext_file()
  966. # url = ''
  967. # url = ''
  968. # r = spider.fetch(url)
  969. # print(r.text)
  970. # home_content = spider.homeContent(None)
  971. # print(home_content)
  972. cate_content = spider.categoryContent('栏目大全', 1, {'cid': 'n'}, {})
  973. # cate_content = spider.categoryContent('频道直播', 1, None, None)
  974. print(cate_content)
  975. vid = cate_content['list'][0]['vod_id']
  976. print(vid)
  977. detail_content = spider.detailContent([vid])
  978. print(detail_content)
  979. # #
  980. vod_play_from = detail_content['list'][0]['vod_play_from']
  981. vod_play_url = detail_content['list'][0]['vod_play_url']
  982. print(vod_play_from, vod_play_url)
  983. _url = vod_play_url.split('#')[0].split('$')[1]
  984. print(_url)
  985. print('vod_play_from:', vod_play_from, ' vod_play_url:', _url)
  986. play = spider.playerContent(vod_play_from, _url, None)
  987. print(play)
  988. # play = spider.playerContent('道长在线直播', 'cctv1||', None)
  989. # print(play)