encode.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : encode.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/8/29
  6. import base64
  7. import math
  8. import re
  9. from urllib.parse import urljoin,quote,unquote
  10. from js2py.base import PyJsString
  11. import requests,warnings
  12. # 关闭警告
  13. warnings.filterwarnings("ignore")
  14. from requests.packages import urllib3
  15. urllib3.disable_warnings()
  16. import requests.utils
  17. import hashlib
  18. from time import sleep
  19. import os
  20. from utils.web import UC_UA,PC_UA
  21. from ast import literal_eval
  22. from utils.log import logger
  23. import quickjs
  24. def getPreJs():
  25. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  26. lib_path = os.path.join(base_path, f'libs/pre.js')
  27. with open(lib_path,encoding='utf-8') as f:
  28. code = f.read()
  29. return code
  30. def getCryptoJS():
  31. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  32. os.makedirs(os.path.join(base_path, f'libs'), exist_ok=True)
  33. lib_path = os.path.join(base_path, f'libs/crypto-hiker.js')
  34. # print('加密库地址:', lib_path)
  35. if not os.path.exists(lib_path):
  36. return 'undefiend'
  37. with open(lib_path,encoding='utf-8') as f:
  38. code = f.read()
  39. return code
  40. def md5(str):
  41. return hashlib.md5(str.encode(encoding='UTF-8')).hexdigest()
  42. def getLib(js):
  43. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录
  44. lib_path = os.path.join(base_path, f'libs/{js}')
  45. if not os.path.exists(lib_path):
  46. return ''
  47. with open(lib_path,encoding='utf-8') as f:
  48. return f.read()
  49. # def atob(text):
  50. # if isinstance(text,PyJsString):
  51. # text = parseText(str(text))
  52. # qjs = quickjs.Context()
  53. # print(text)
  54. # js = getLib('atob.js')
  55. # print(js)
  56. # ret = qjs.eval(f'{js};atob("{text}")')
  57. # print(ret)
  58. def atob(text):
  59. """
  60. 解码
  61. :param text:
  62. :return:
  63. """
  64. if isinstance(text,PyJsString):
  65. text = parseText(str(text))
  66. return base64.b64decode(text.encode("utf8")).decode("latin1")
  67. def btoa(text):
  68. """
  69. 编码
  70. :param text:
  71. :return:
  72. """
  73. if isinstance(text,PyJsString):
  74. text = parseText(str(text))
  75. return base64.b64encode(text.encode("latin1")).decode("utf8")
  76. def requireCache(lib:str):
  77. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  78. os.makedirs(os.path.join(base_path, f'libs'), exist_ok=True)
  79. logger.info(f'开始加载:{lib}')
  80. code = 'undefiend'
  81. if not lib.startswith('http'):
  82. lib_path = os.path.join(base_path, f'libs/{lib}')
  83. if not os.path.exists(lib_path):
  84. pass
  85. else:
  86. with open(lib_path, encoding='utf-8') as f:
  87. code = f.read()
  88. else:
  89. lib_path = os.path.join(base_path, f'libs/{md5(lib)}.js')
  90. if not os.path.exists(lib_path):
  91. try:
  92. r = requests.get(lib,headers={
  93. 'Referer': lib,
  94. 'User-Agent': UC_UA,
  95. },timeout=5,verify=False)
  96. with open(lib_path,mode='wb+') as f:
  97. f.write(r.content)
  98. code = r.text
  99. except Exception as e:
  100. print(f'获取远程依赖失败:{e}')
  101. else:
  102. with open(lib_path,encoding='utf-8') as f:
  103. code = f.read()
  104. # print(code)
  105. return code
  106. def getHome(url):
  107. # http://www.baidu.com:9000/323
  108. urls = url.split('//')
  109. homeUrl = urls[0] + '//' + urls[1].split('/')[0]
  110. return homeUrl
  111. class OcrApi:
  112. def __init__(self,api):
  113. self.api = api
  114. def classification(self,img):
  115. try:
  116. code = requests.post(self.api,data=img,headers={'user-agent':PC_UA},verify=False).text
  117. except Exception as e:
  118. print(f'ocr识别发生错误:{e}')
  119. code = ''
  120. return code
  121. def verifyCode(url,headers,timeout=5,total_cnt=3,api=None):
  122. if not api:
  123. # api = 'http://192.168.3.224:9000/api/ocr_img'
  124. api = 'http://dm.mudery.com:10000'
  125. lower_keys = list(map(lambda x: x.lower(), headers.keys()))
  126. host = getHome(url)
  127. if not 'referer' in lower_keys:
  128. headers['Referer'] = host
  129. print(f'开始自动过验证,请求头:{headers}')
  130. cnt = 0
  131. ocr = OcrApi(api)
  132. while cnt < total_cnt:
  133. s = requests.session()
  134. try:
  135. img = s.get(url=f"{host}/index.php/verify/index.html", headers=headers,timeout=timeout,verify=False).content
  136. code = ocr.classification(img)
  137. print(f'第{cnt+1}次验证码识别结果:{code}')
  138. res = s.post(
  139. url=f"{host}/index.php/ajax/verify_check?type=search&verify={code}",
  140. headers=headers).json()
  141. if res["msg"] == "ok":
  142. cookies_dict = requests.utils.dict_from_cookiejar(s.cookies)
  143. cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
  144. # return cookies_dict
  145. return cookie_str
  146. except:
  147. print(f'第{cnt+1}次验证码提交失败')
  148. pass
  149. cnt += 1
  150. sleep(1)
  151. return ''
  152. def base64Encode(text):
  153. if isinstance(text,PyJsString):
  154. text = str(text).replace("'","").replace('"','')
  155. return base64.b64encode(text.encode("utf8")).decode("utf-8") #base64编码
  156. def base64Decode(text):
  157. if isinstance(text,PyJsString):
  158. text = parseText(str(text))
  159. # print(text)
  160. return base64.b64decode(text).decode("utf-8") #base64解码
  161. def parseText(text:str):
  162. text = text.replace('false','False').replace('true','True').replace('null','None')
  163. # print(text)
  164. return literal_eval(text)
  165. def setDetail(title:str,img:str,desc:str,content:str,tabs:list=None,lists:list=None):
  166. vod = {
  167. "vod_name": title.split('/n')[0],
  168. "vod_pic": img,
  169. "type_name": title,
  170. "vod_year": "",
  171. "vod_area": "",
  172. "vod_remarks": desc,
  173. "vod_actor": "",
  174. "vod_director": "",
  175. "vod_content": content
  176. }
  177. return vod
  178. def urljoin2(a,b):
  179. a = str(a).replace("'",'').replace('"','')
  180. b = str(b).replace("'",'').replace('"','')
  181. # print(type(a),a)
  182. # print(type(b),b)
  183. ret = urljoin(a,b)
  184. return ret
  185. def join(lists,string):
  186. """
  187. 残废函数,没法使用
  188. :param lists:
  189. :param string:
  190. :return:
  191. """
  192. # FIXME
  193. lists1 = lists.to_list()
  194. string1 = str(string)
  195. print(type(lists1),lists1)
  196. print(type(string1),string1)
  197. try:
  198. ret = string1.join(lists1)
  199. print(ret)
  200. return ret
  201. except Exception as e:
  202. print(e)
  203. return ''
  204. def dealObj(obj=None):
  205. if not obj:
  206. obj = {}
  207. encoding = obj.get('encoding') or 'utf-8'
  208. encoding = str(encoding).replace("'", "")
  209. # encoding = parseText(str(encoding))
  210. method = obj.get('method') or 'get'
  211. method = str(method).replace("'", "")
  212. # method = parseText(str(method))
  213. withHeaders = obj.get('withHeaders') or ''
  214. withHeaders = str(withHeaders).replace("'", "")
  215. # withHeaders = parseText(str(withHeaders))
  216. # print(type(url),url)
  217. # headers = dict(obj.get('headers')) if obj.get('headers') else {}
  218. # headers = obj.get('headers').to_dict() if obj.get('headers') else {}
  219. headers = obj.get('headers') if obj.get('headers') else {}
  220. new_headers = {}
  221. # print(type(headers),headers)
  222. for i in headers:
  223. new_headers[str(i).replace("'", "")] = str(headers[i]).replace("'", "")
  224. # print(type(new_headers), new_headers)
  225. timeout = float(obj.get('timeout').to_int()) if obj.get('timeout') else None
  226. # print(type(timeout), timeout)
  227. body = obj.get('body') if obj.get('body') else {}
  228. # print(body)
  229. # print(type(body))
  230. if isinstance(body,PyJsString):
  231. body = parseText(str(body))
  232. new_dict = {}
  233. new_tmp = body.split('&')
  234. for i in new_tmp:
  235. new_dict[i.split('=')[0]] = i.split('=')[1]
  236. body = new_dict
  237. new_body = {}
  238. for i in body:
  239. new_body[str(i).replace("'", "")] = str(body[i]).replace("'", "")
  240. return {
  241. 'encoding':encoding,
  242. 'headers':new_headers,
  243. 'timeout':timeout,
  244. 'body': new_body,
  245. 'method':method,
  246. 'withHeaders':withHeaders
  247. }
  248. def coverDict2form(data:dict):
  249. forms = []
  250. for k,v in data.items():
  251. forms.append(f'{k}={v}')
  252. return '&'.join(forms)
  253. def base_request(url,obj):
  254. # verify=False 关闭证书验证
  255. # print(obj)
  256. url = str(url).replace("'", "")
  257. method = obj.get('method') or ''
  258. withHeaders = obj.get('withHeaders') or ''
  259. # print(f'withHeaders:{withHeaders}')
  260. if not method:
  261. method = 'get'
  262. obj['method'] = 'method'
  263. # print(obj)
  264. print(f"{method}:{url}:{obj['headers']}:{obj.get('body','')}")
  265. try:
  266. # r = requests.get(url, headers=headers, params=body, timeout=timeout)
  267. if method.lower() == 'get':
  268. r = requests.get(url, headers=obj['headers'], params=obj['body'], timeout=obj['timeout'],verify=False)
  269. else:
  270. # if isinstance(obj['body'],dict):
  271. # obj['body'] = coverDict2form(obj['body'])
  272. # print(obj['body'])
  273. # 亲测不需要转换data 格式的dict 为 form都正常 (gaze规则和奇优搜索)
  274. r = requests.post(url, headers=obj['headers'], data=obj['body'], timeout=obj['timeout'],verify=False)
  275. # r = requests.get(url, timeout=timeout)
  276. # r = requests.get(url)
  277. # print(encoding)
  278. r.encoding = obj['encoding']
  279. # print(f'源码:{r.text}')
  280. if withHeaders:
  281. backObj = {
  282. 'url':r.url,
  283. 'body':r.text,
  284. 'headers':r.headers
  285. }
  286. return backObj
  287. else:
  288. return r.text
  289. except Exception as e:
  290. print(f'{method}请求发生错误:{e}')
  291. return {} if withHeaders else ''
  292. def fetch(url,obj):
  293. # print('fetch')
  294. obj = dealObj(obj)
  295. if not obj.get('headers') or not any([obj['headers'].get('User-Agent'),obj['headers'].get('user-agent')]):
  296. obj['headers']['User-Agent'] = obj['headers'].get('user-agent',PC_UA)
  297. return base_request(url,obj)
  298. def post(url,obj):
  299. obj = dealObj(obj)
  300. obj['method'] = 'post'
  301. return base_request(url,obj)
  302. def request(url,obj):
  303. obj = dealObj(obj)
  304. # print(f'{method}:{url}')
  305. if not obj.get('headers') or not any([obj['headers'].get('User-Agent'),obj['headers'].get('user-agent')]):
  306. obj['headers']['User-Agent'] = obj['headers'].get('user-agent',UC_UA)
  307. return base_request(url, obj)
  308. def redx(text):
  309. """
  310. 修正js2py交互的字符串自动加前后引号问题
  311. :param text:
  312. :return:
  313. """
  314. # return text.replace("'", "").replace('"', "")
  315. text = str(text)
  316. if text.startswith("'") and text.endswith("'"):
  317. text = text[1:-1]
  318. return text
  319. def buildUrl(url,obj=None):
  320. # url = str(url).replace("'", "")
  321. url = redx(url)
  322. if not obj:
  323. obj = {}
  324. new_obj = {}
  325. for i in obj:
  326. # new_obj[str(i).replace("'", "")] = str(obj[i]).replace("'", "")
  327. new_obj[redx(i)] = redx(obj[i])
  328. if str(url).find('?') < 0:
  329. url = str(url) + '?'
  330. param_list = [f'{i}={new_obj[i]}' for i in new_obj]
  331. # print(param_list)
  332. prs = '&'.join(param_list)
  333. if len(new_obj) > 0 and not str(url).endswith('?'):
  334. url += '&'
  335. # url = (url + prs).replace('"','').replace("'",'')
  336. url = url + prs
  337. # print(url)
  338. return url
  339. def forceOrder(lists:list,key:str=None,option=None):
  340. """
  341. 强制正序
  342. :param lists:
  343. :param key:
  344. :return:
  345. """
  346. start = math.floor(len(lists)/2)
  347. end = min(len(lists)-1,start+1)
  348. if start >= end:
  349. return lists
  350. first = lists[start]
  351. second = lists[end]
  352. if key:
  353. try:
  354. first = first[key]
  355. second = second[key]
  356. except:
  357. pass
  358. if option and hasattr(option, '__call__'):
  359. try:
  360. first = option(first)
  361. second = option(second)
  362. # print(f'first:{first},second:{second}')
  363. except Exception as e:
  364. print(f'强制排序执行option发生了错误:{e}')
  365. first = str(first)
  366. second = str(second)
  367. if re.search(r'(\d+)',first) and re.search(r'(\d+)',second):
  368. num1 = int(re.search(r'(\d+)',first).groups()[0])
  369. num2 = int(re.search(r'(\d+)',second).groups()[0])
  370. if num1 > num2:
  371. lists.reverse()
  372. return lists
  373. def base64ToImage(image_base64:str):
  374. if isinstance(image_base64,PyJsString):
  375. image_base64 = parseText(str(image_base64))
  376. if ',' in image_base64:
  377. image_base64 = image_base64.split(',')[1]
  378. img_data = base64.b64decode(image_base64)
  379. return img_data