yangyi1975
/
mi
mirror of https://github.com/yangyi1975/m.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
							#coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..') 
from base.spider import Spider
import time
import re
from urllib import request, parse
import urllib
import urllib.request
from xml.etree.ElementTree import fromstring, ElementTree as et

class Spider(Spider):  # 元类 默认的元类 type
	def getName():
		return "新浪资源"#除去少儿不宜的内容
	filterate=True
	def init(self,extend=""):
		print("============{0}============".format(extend))
		pass
	def isVideoFormat(self,url):
		pass
	def manualVideoCheck(self):
		pass
	def homeContent(self,filter):
		result = {}
		timeClass =time.localtime(time.time())
		cateManual ={
			'动漫':'3',
			'动漫电影':'17',
			'综艺':'4',
			'纪录片':'5',
			'动作片':'6',
			'爱情片':'7',
			'科幻片':'8',
			'战争片':'9',
			'剧情片':'10',
			'恐怖片':'11',
			'喜剧片':'12',
			'大陆剧':'13',
			'港澳剧':'14',
			'台湾剧':'15',
			'欧美剧':'16',
			'韩剧':'18',
			'日剧':'20',
			'泰剧':'21',
			'体育':'23'
		}
		# if timeClass.tm_hour>22:
		# 	cateManual['伦理片']='22'
		# 	self.filterate=False
		classes = []
		for k in cateManual:
			classes.append({
				'type_name':k,
				'type_id':cateManual[k]
			})
		result['class'] = classes
		if(filter):
			result['filters'] = self.config['filter']
		return result
	def homeVideoContent(self):
		xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
		tree = et(fromstring(xmlTxt))
		root = tree.getroot()
		listXml=root.iter('list')
		videos = self.custom_list(html=listXml)
		result = {
			'list':videos
		}
		return result
	def categoryContent(self,tid,pg,filter,extend):
		result = {}
		videos=[]
		pagecount=1
		limit=20
		total=9999
		Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(tid,pg)
		xmlTxt=self.custom_webReadFile(urlStr=Url)
		tree = et(fromstring(xmlTxt))
		root = tree.getroot()
		listXml=root.iter('list')
		for vod in listXml:
			pagecount=vod.attrib['pagecount']
			limit=vod.attrib['pagesize']
			total=vod.attrib['recordcount']
		videos = self.custom_list(html=root.iter('list'))
		result['list'] = videos
		result['page'] = pg
		result['pagecount'] = pagecount
		result['limit'] = limit
		result['total'] = total
		return result
	def detailContent(self,array):
		result = {}
		aid = array[0].split('###')
		id=aid[1]
		logo = aid[2]
		title = aid[0]
		vod_play_from=['播放线路',]
		vod_year=''
		vod_actor=''
		vod_content=''
		vod_director=''
		type_name=''
		vod_area=''
		vod_lang=''
		vodItems=[]
		vod_play_url=[]
		try:
			url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+id
			xmlTxt=self.custom_webReadFile(urlStr=url)
			jRoot = et(fromstring(xmlTxt))
			xmlList=jRoot.iter('list')
			for vod in xmlList:
					for x in vod:
						for v in x:
							if v.tag=='actor':
								vod_actor=v.text
							if v.tag=='director':
								vod_director=v.text
							if v.tag=='des':
								vod_content=v.text
							if v.tag=='area':
								vod_area=v.text
							if v.tag=='year':
								vod_year=v.text
							if v.tag=='type':
								type_name=v.text
							if v.tag=='lang':
								vod_lang=v.text

			temporary=self.custom_RegexGetText(Text=xmlTxt,RegexText=r'<dd flag="xlyun">(.+?)</dd>',Index=1)			
			temporary=temporary.replace('<![CDATA[','').replace(']]>','')
			vodItems=self.custom_EpisodesList(temporary)
			joinStr = "#".join(vodItems)
			vod_play_url.append(joinStr)
		except :
			pass
		vod = {
			"vod_id":array[0],
			"vod_name":title,
			"vod_pic":logo,
			"type_name":type_name,
			"vod_year":vod_year,
			"vod_area":vod_area,
			"vod_remarks":vod_lang,
			"vod_actor":vod_actor,
			"vod_director":vod_director,
			"vod_content":vod_content
		}
		vod['vod_play_from'] =  "$$$".join(vod_play_from)
		vod['vod_play_url'] = "$$$".join(vod_play_url)
		result = {
			'list':[
				vod
			]
		}
		if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
			result={'list':[]}
		return result

	def searchContent(self,key,quick):
		Url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(urllib.parse.quote(key),'1')
		xmlTxt=self.custom_webReadFile(urlStr=Url)
		tree = et(fromstring(xmlTxt))
		root = tree.getroot()
		listXml=root.iter('list')
		videos = self.custom_list(html=listXml)
		result = {
			'list':videos
		}
		return result
	def playerContent(self,flag,id,vipFlags):
		result = {}
		parse=1
		url=id
		htmlTxt=self.custom_webReadFile(urlStr=url,header=self.header)
		url=self.custom_RegexGetText(Text=htmlTxt,RegexText=r'(https{0,1}://.+?\.m3u8)',Index=1)
		if url.find('.m3u8')<1:
			url=id
			parse=0
		result["parse"] = parse#0=直接播放、1=嗅探
		result["playUrl"] =''
		result["url"] = url
		result['jx'] = 0#VIP解析,0=不解析、1=解析
		result["header"] = ''	
		return result


	config = {
		"player": {},
		"filter": {}
		}
	header = {}
	def localProxy(self,param):
		return [200, "video/MP2T", action, ""]
	#-----------------------------------------------自定义函数-----------------------------------------------
		#正则取文本
	def custom_RegexGetText(self,Text,RegexText,Index):
		returnTxt=""
		Regex=re.search(RegexText, Text, re.M|re.S)
		if Regex is None:
			returnTxt=""
		else:
			returnTxt=Regex.group(Index)
		return returnTxt	
	#分类取结果
	def custom_list(self,html):
		ListRe=html
		videos = []
		temporary=[]
		for vod in ListRe:
			for value in vod:
				for x in value:

					if x.tag=='name':
						title=x.text
					if x.tag=='id':
						id=x.text
					if x.tag=='type':
						tid=x.text
					if x.tag=='last':
						last=x.text
				temporary.append({
					"name":title,
					"id":id,
					"last":last
					})
		
		if len(temporary)>0:
			idTxt=''
			for vod in temporary:
				idTxt=idTxt+vod['id']+','
			if len(idTxt)>1:
				idTxt=idTxt[0:-1]
				url='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids='+idTxt
				xmlTxt=self.custom_webReadFile(urlStr=url)
				jRoot =  et(fromstring(xmlTxt))
				xmlList=jRoot.iter('list')
				for vod in xmlList:
					for x in vod:
						for v in x:
							if v.tag=='name':
								title=v.text
							if v.tag=='id':
								vod_id=v.text
							if v.tag=='pic':
								img=v.text
							if v.tag=='note':
								remarks=v.text
							if v.tag=='year':
								vod_year=v.text
							if v.tag=='type':
								type_name=v.text
						if self.filterate==True and self.custom_RegexGetText(Text=type_name,RegexText=r'(伦理|倫理|福利)',Index=1)!='':
							continue
						vod_id='{0}###{1}###{2}'.format(title,vod_id,img)
						# vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)					
						# print(vod_id)
						videos.append({
							"vod_id":vod_id,
							"vod_name":title,
							"vod_pic":img,
							"vod_year":vod_year,
							"vod_remarks":remarks
						})
		return videos
		#访问网页
	def custom_webReadFile(self,urlStr,header=None,codeName='utf-8'):
		html=''
		if header==None:
			header={
				"Referer":urlStr,
				'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
				"Host":self.custom_RegexGetText(Text=urlStr,RegexText='https*://(.*?)(/|$)',Index=1)
			}
		# import ssl
		# ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
		req=urllib.request.Request(url=urlStr,headers=header)#,headers=header
		with  urllib.request.urlopen(req)  as response:
			html = response.read().decode(codeName)
		return html
	
	#取剧集区
	def custom_lineList(self,Txt,mark,after):
		circuit=[]
		origin=Txt.find(mark)
		while origin>8:
			end=Txt.find(after,origin)
			circuit.append(Txt[origin:end])
			origin=Txt.find(mark,end)
		return circuit	
	#正则取文本,返回数组	
	def custom_RegexGetTextLine(self,Text,RegexText,Index):
		returnTxt=[]
		pattern = re.compile(RegexText, re.M|re.S)
		ListRe=pattern.findall(Text)
		if len(ListRe)<1:
			return returnTxt
		for value in ListRe:
			returnTxt.append(value)	
		return returnTxt
	#取集数
	def custom_EpisodesList(self,html):
		ListRe=html.split('#')
		videos = []
		for vod in ListRe:
			t= vod.split('$')
			url =t[1]
			title =t[0]
			if len(url) == 0:
				continue
			videos.append(title+"$"+url)
		return videos
	#取分类
	def custom_classification(self):
		xmlTxt=self.custom_webReadFile(urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
		tree = et(fromstring(xmlTxt))
		root = tree.getroot()
		classXml=root.iter('class')
		temporaryClass={}
		for vod in classXml:
			for value in vod:
				if self.custom_RegexGetText(Text=value.text,RegexText=r'(福利|倫理片|伦理片)',Index=1)!='':
					continue
				temporaryClass[value.text]=value.attrib['id']
				print("'{0}':'{1}',".format(value.text,value.attrib['id']))
		return temporaryClass

# T=Spider()
# T. homeContent(filter=False)
# T.custom_classification()
# l=T.homeVideoContent()
# l=T.searchContent(key='柯南',quick='')
# l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
# for x in l['list']:
# 	print(x['vod_name'])
# mubiao= l['list'][2]['vod_id']
# # print(mubiao)
# playTabulation=T.detailContent(array=[mubiao,])
# # print(playTabulation)
# vod_play_from=playTabulation['list'][0]['vod_play_from']
# vod_play_url=playTabulation['list'][0]['vod_play_url']
# url=vod_play_url.split('$$$')
# vod_play_from=vod_play_from.split('$$$')[0]
# url=url[0].split('$')
# url=url[1].split('#')[0]
# # print(url)
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
# print(m3u8)