encode.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. # File : encode.py
  4. # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
  5. # Date : 2022/8/29
  6. import base64
  7. import math
  8. import re
  9. from urllib.parse import urljoin,quote,unquote
  10. from js2py.base import PyJsString
  11. import requests,warnings
  12. # 关闭警告
  13. warnings.filterwarnings("ignore")
  14. requests.packages.urllib3.disable_warnings()
  15. import requests.utils
  16. import hashlib
  17. from time import sleep
  18. import os
  19. from utils.web import UC_UA,PC_UA
  20. from ast import literal_eval
  21. from utils.log import logger
  22. import quickjs
  23. def getPreJs():
  24. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  25. lib_path = os.path.join(base_path, f'libs/pre.js')
  26. with open(lib_path,encoding='utf-8') as f:
  27. code = f.read()
  28. return code
  29. def getCryptoJS():
  30. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  31. os.makedirs(os.path.join(base_path, f'libs'), exist_ok=True)
  32. lib_path = os.path.join(base_path, f'libs/crypto-hiker.js')
  33. # print('加密库地址:', lib_path)
  34. if not os.path.exists(lib_path):
  35. return 'undefiend'
  36. with open(lib_path,encoding='utf-8') as f:
  37. code = f.read()
  38. return code
  39. def md5(str):
  40. return hashlib.md5(str.encode(encoding='UTF-8')).hexdigest()
  41. def getLib(js):
  42. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录
  43. lib_path = os.path.join(base_path, f'libs/{js}')
  44. if not os.path.exists(lib_path):
  45. return ''
  46. with open(lib_path,encoding='utf-8') as f:
  47. return f.read()
  48. # def atob(text):
  49. # if isinstance(text,PyJsString):
  50. # text = parseText(str(text))
  51. # qjs = quickjs.Context()
  52. # print(text)
  53. # js = getLib('atob.js')
  54. # print(js)
  55. # ret = qjs.eval(f'{js};atob("{text}")')
  56. # print(ret)
  57. def atob(text):
  58. """
  59. 解码
  60. :param text:
  61. :return:
  62. """
  63. if isinstance(text,PyJsString):
  64. text = parseText(str(text))
  65. return base64.b64decode(text.encode("utf8")).decode("latin1")
  66. def btoa(text):
  67. """
  68. 编码
  69. :param text:
  70. :return:
  71. """
  72. if isinstance(text,PyJsString):
  73. text = parseText(str(text))
  74. return base64.b64encode(text.encode("latin1")).decode("utf8")
  75. def requireCache(lib:str):
  76. base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目
  77. os.makedirs(os.path.join(base_path, f'libs'), exist_ok=True)
  78. logger.info(f'开始加载:{lib}')
  79. code = 'undefiend'
  80. if not lib.startswith('http'):
  81. lib_path = os.path.join(base_path, f'libs/{lib}')
  82. if not os.path.exists(lib_path):
  83. pass
  84. else:
  85. with open(lib_path, encoding='utf-8') as f:
  86. code = f.read()
  87. else:
  88. lib_path = os.path.join(base_path, f'libs/{md5(lib)}.js')
  89. if not os.path.exists(lib_path):
  90. try:
  91. r = requests.get(lib,headers={
  92. 'Referer': lib,
  93. 'User-Agent': UC_UA,
  94. },timeout=5,verify=False)
  95. with open(lib_path,mode='wb+') as f:
  96. f.write(r.content)
  97. code = r.text
  98. except Exception as e:
  99. print(f'获取远程依赖失败:{e}')
  100. else:
  101. with open(lib_path,encoding='utf-8') as f:
  102. code = f.read()
  103. # print(code)
  104. return code
  105. def getHome(url):
  106. # http://www.baidu.com:9000/323
  107. urls = url.split('//')
  108. homeUrl = urls[0] + '//' + urls[1].split('/')[0]
  109. return homeUrl
  110. class OcrApi:
  111. def __init__(self,api):
  112. self.api = api
  113. def classification(self,img):
  114. try:
  115. # code = requests.post(self.api,data=img,headers={'user-agent':PC_UA},verify=False).text
  116. code = requests.post(self.api,data=base64.b64encode(img).decode(),headers={'user-agent':PC_UA},verify=False).text
  117. except Exception as e:
  118. print(f'ocr识别发生错误:{e}')
  119. code = ''
  120. return code
  121. def verifyCode(url,headers,timeout=5,total_cnt=3,api=None):
  122. if not api:
  123. # api = 'http://192.168.3.224:9000/api/ocr_img'
  124. api = 'https://api.nn.ci/ocr/b64/text'
  125. lower_keys = list(map(lambda x: x.lower(), headers.keys()))
  126. host = getHome(url)
  127. if not 'referer' in lower_keys:
  128. headers['Referer'] = host
  129. print(f'开始自动过验证,请求头:{headers}')
  130. cnt = 0
  131. ocr = OcrApi(api)
  132. while cnt < total_cnt:
  133. s = requests.session()
  134. try:
  135. img = s.get(url=f"{host}/index.php/verify/index.html", headers=headers,timeout=timeout,verify=False).content
  136. code = ocr.classification(img)
  137. print(f'第{cnt+1}次验证码识别结果:{code}')
  138. res = s.post(
  139. url=f"{host}/index.php/ajax/verify_check?type=search&verify={code}",
  140. headers=headers).json()
  141. if res["msg"] == "ok":
  142. cookies_dict = requests.utils.dict_from_cookiejar(s.cookies)
  143. cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict])
  144. # return cookies_dict
  145. return cookie_str
  146. except:
  147. print(f'第{cnt+1}次验证码提交失败')
  148. pass
  149. cnt += 1
  150. sleep(1)
  151. return ''
  152. def base64Encode(text):
  153. if isinstance(text,PyJsString):
  154. text = str(text).replace("'","").replace('"','')
  155. return base64.b64encode(text.encode("utf8")).decode("utf-8") #base64编码
  156. def base64Decode(text):
  157. if isinstance(text,PyJsString):
  158. text = parseText(str(text))
  159. # print(text)
  160. return base64.b64decode(text).decode("utf-8") #base64解码
  161. def encodeStr(input, encoding='GBK'):
  162. """
  163. 指定字符串编码
  164. :param input:
  165. :param encoding:
  166. :return:
  167. """
  168. if isinstance(input,PyJsString):
  169. input = parseText(str(input))
  170. if isinstance(encoding,PyJsString):
  171. encoding = parseText(str(input))
  172. return quote(input.encode(encoding, 'ignore'))
  173. def decodeStr(input, encoding='GBK'):
  174. """
  175. 指定字符串解码
  176. :param input:
  177. :param encoding:
  178. :return:
  179. """
  180. if isinstance(input,PyJsString):
  181. input = parseText(str(input))
  182. if isinstance(encoding,PyJsString):
  183. encoding = parseText(str(input))
  184. return unquote(input,encoding)
  185. def parseText(text:str):
  186. text = text.replace('false','False').replace('true','True').replace('null','None')
  187. # print(text)
  188. return literal_eval(text)
  189. def setDetail(title:str,img:str,desc:str,content:str,tabs:list=None,lists:list=None):
  190. vod = {
  191. "vod_name": title.split('/n')[0],
  192. "vod_pic": img,
  193. "type_name": title,
  194. "vod_year": "",
  195. "vod_area": "",
  196. "vod_remarks": desc,
  197. "vod_actor": "",
  198. "vod_director": "",
  199. "vod_content": content
  200. }
  201. return vod
  202. def urljoin2(a,b):
  203. a = str(a).replace("'",'').replace('"','')
  204. b = str(b).replace("'",'').replace('"','')
  205. # print(type(a),a)
  206. # print(type(b),b)
  207. ret = urljoin(a,b)
  208. return ret
  209. def join(lists,string):
  210. """
  211. 残废函数,没法使用
  212. :param lists:
  213. :param string:
  214. :return:
  215. """
  216. # FIXME
  217. lists1 = lists.to_list()
  218. string1 = str(string)
  219. print(type(lists1),lists1)
  220. print(type(string1),string1)
  221. try:
  222. ret = string1.join(lists1)
  223. print(ret)
  224. return ret
  225. except Exception as e:
  226. print(e)
  227. return ''
  228. def dealObj(obj=None):
  229. if not obj:
  230. obj = {}
  231. encoding = obj.get('encoding') or 'utf-8'
  232. encoding = str(encoding).replace("'", "")
  233. # encoding = parseText(str(encoding))
  234. method = obj.get('method') or 'get'
  235. method = str(method).replace("'", "")
  236. # method = parseText(str(method))
  237. withHeaders = obj.get('withHeaders') or ''
  238. withHeaders = str(withHeaders).replace("'", "")
  239. # withHeaders = parseText(str(withHeaders))
  240. # print(type(url),url)
  241. # headers = dict(obj.get('headers')) if obj.get('headers') else {}
  242. # headers = obj.get('headers').to_dict() if obj.get('headers') else {}
  243. headers = obj.get('headers') if obj.get('headers') else {}
  244. new_headers = {}
  245. # print(type(headers),headers)
  246. for i in headers:
  247. new_headers[str(i).replace("'", "")] = str(headers[i]).replace("'", "")
  248. # print(type(new_headers), new_headers)
  249. timeout = float(obj.get('timeout').to_int()) if obj.get('timeout') else None
  250. # print(type(timeout), timeout)
  251. body = obj.get('body') if obj.get('body') else {}
  252. # print(body)
  253. # print(type(body))
  254. if isinstance(body,PyJsString):
  255. body = parseText(str(body))
  256. new_dict = {}
  257. new_tmp = body.split('&')
  258. for i in new_tmp:
  259. new_dict[i.split('=')[0]] = i.split('=')[1]
  260. body = new_dict
  261. new_body = {}
  262. for i in body:
  263. new_body[str(i).replace("'", "")] = str(body[i]).replace("'", "")
  264. return {
  265. 'encoding':encoding,
  266. 'headers':new_headers,
  267. 'timeout':timeout,
  268. 'body': new_body,
  269. 'method':method,
  270. 'withHeaders':withHeaders
  271. }
  272. def coverDict2form(data:dict):
  273. forms = []
  274. for k,v in data.items():
  275. forms.append(f'{k}={v}')
  276. return '&'.join(forms)
  277. def base_request(url,obj):
  278. # verify=False 关闭证书验证
  279. # print(obj)
  280. url = str(url).replace("'", "")
  281. method = obj.get('method') or ''
  282. withHeaders = obj.get('withHeaders') or ''
  283. # print(f'withHeaders:{withHeaders}')
  284. if not method:
  285. method = 'get'
  286. obj['method'] = 'method'
  287. # print(obj)
  288. print(f"{method}:{url}:{obj['headers']}:{obj.get('body','')},请求超时:{obj['timeout']}")
  289. try:
  290. # r = requests.get(url, headers=headers, params=body, timeout=timeout)
  291. if method.lower() == 'get':
  292. r = requests.get(url, headers=obj['headers'], params=obj['body'], timeout=obj['timeout'],verify=False)
  293. else:
  294. # if isinstance(obj['body'],dict):
  295. # obj['body'] = coverDict2form(obj['body'])
  296. # print(obj['body'])
  297. # 亲测不需要转换data 格式的dict 为 form都正常 (gaze规则和奇优搜索)
  298. r = requests.post(url, headers=obj['headers'], data=obj['body'], timeout=obj['timeout'],verify=False)
  299. # r = requests.get(url, timeout=timeout)
  300. # r = requests.get(url)
  301. # print(encoding)
  302. r.encoding = obj['encoding']
  303. # print(f'源码:{r.text}')
  304. if withHeaders:
  305. backObj = {
  306. 'url':r.url,
  307. 'body':r.text,
  308. 'headers':r.headers
  309. }
  310. return backObj
  311. else:
  312. return r.text
  313. except Exception as e:
  314. print(f'{method}请求发生错误:{e}')
  315. return {} if withHeaders else ''
  316. def fetch(url,obj):
  317. # print('fetch')
  318. obj = dealObj(obj)
  319. if not obj.get('headers') or not any([obj['headers'].get('User-Agent'),obj['headers'].get('user-agent')]):
  320. obj['headers']['User-Agent'] = obj['headers'].get('user-agent',PC_UA)
  321. return base_request(url,obj)
  322. def post(url,obj):
  323. obj = dealObj(obj)
  324. obj['method'] = 'post'
  325. return base_request(url,obj)
  326. def request(url,obj):
  327. obj = dealObj(obj)
  328. # print(f'{method}:{url}')
  329. if not obj.get('headers') or not any([obj['headers'].get('User-Agent'),obj['headers'].get('user-agent')]):
  330. obj['headers']['User-Agent'] = obj['headers'].get('user-agent',UC_UA)
  331. return base_request(url, obj)
  332. def redx(text):
  333. """
  334. 修正js2py交互的字符串自动加前后引号问题
  335. :param text:
  336. :return:
  337. """
  338. # return text.replace("'", "").replace('"', "")
  339. text = str(text)
  340. if text.startswith("'") and text.endswith("'"):
  341. text = text[1:-1]
  342. return text
  343. def buildUrl(url,obj=None):
  344. # url = str(url).replace("'", "")
  345. url = redx(url)
  346. if not obj:
  347. obj = {}
  348. new_obj = {}
  349. for i in obj:
  350. # new_obj[str(i).replace("'", "")] = str(obj[i]).replace("'", "")
  351. new_obj[redx(i)] = redx(obj[i])
  352. if str(url).find('?') < 0:
  353. url = str(url) + '?'
  354. param_list = [f'{i}={new_obj[i]}' for i in new_obj]
  355. # print(param_list)
  356. prs = '&'.join(param_list)
  357. if len(new_obj) > 0 and not str(url).endswith('?'):
  358. url += '&'
  359. # url = (url + prs).replace('"','').replace("'",'')
  360. url = url + prs
  361. # print(url)
  362. return url
  363. def forceOrder(lists:list,key:str=None,option=None):
  364. """
  365. 强制正序
  366. :param lists:
  367. :param key:
  368. :return:
  369. """
  370. start = math.floor(len(lists)/2)
  371. end = min(len(lists)-1,start+1)
  372. if start >= end:
  373. return lists
  374. first = lists[start]
  375. second = lists[end]
  376. if key:
  377. try:
  378. first = first[key]
  379. second = second[key]
  380. except:
  381. pass
  382. if option and hasattr(option, '__call__'):
  383. try:
  384. first = option(first)
  385. second = option(second)
  386. # print(f'first:{first},second:{second}')
  387. except Exception as e:
  388. print(f'强制排序执行option发生了错误:{e}')
  389. first = str(first)
  390. second = str(second)
  391. if re.search(r'(\d+)',first) and re.search(r'(\d+)',second):
  392. num1 = int(re.search(r'(\d+)',first).groups()[0])
  393. num2 = int(re.search(r'(\d+)',second).groups()[0])
  394. if num1 > num2:
  395. lists.reverse()
  396. return lists
  397. def base64ToImage(image_base64:str):
  398. if isinstance(image_base64,PyJsString):
  399. image_base64 = parseText(str(image_base64))
  400. if ',' in image_base64:
  401. image_base64 = image_base64.split(',')[1]
  402. img_data = base64.b64decode(image_base64)
  403. return img_data