123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- // async function request(url, options) {
- // try {
- // log('rule:',typeof rule);
- // log('headers:',rule.headers);
- // log('title:',rule.title);
- // log('getHome:',typeof getHome);
- // log('gzip',typeof(gzip))
- // log('fetch_params',typeof(fetch_params))
- // return (await req(url, options)).content
- // } catch (e) {
- // log(`requestHtml error:${e.message}`);
- // return ''
- // }
- // }
- // var key = '源的唯一ID' // 允许在源里自定义设置key,不设置就自动取title或者host
- const RKEY = typeof (key) !== 'undefined' && key ? key : 'drpyS_' + (rule.title || rule.host); // 源的唯一标识
- /**
- * 海阔网页请求函数完整封装
- * @param url 请求链接
- * @param obj 请求对象 {headers:{},method:'',timeout:5000,body:'',withHeaders:false}
- * @param ocr_flag 标识此flag是用于请求ocr识别的,自动过滤content-type指定编码
- * @returns {string|string|DocumentFragment|*}
- */
- async function request(url, obj, ocr_flag) {
- ocr_flag = ocr_flag || false;
- if (typeof (obj) === 'undefined' || !obj || obj === {}) {
- let fetch_params = {};
- let headers = {
- 'User-Agent': MOBILE_UA,
- };
- if (rule.headers) {
- Object.assign(headers, rule.headers);
- }
- let keys = Object.keys(headers).map(it => it.toLowerCase());
- if (!keys.includes('referer')) {
- headers['Referer'] = getHome(url);
- }
- fetch_params.headers = headers;
- obj = fetch_params;
- } else {
- let headers = obj.headers || {};
- let keys = Object.keys(headers).map(it => it.toLowerCase());
- if (!keys.includes('user-agent')) {
- headers['User-Agent'] = MOBILE_UA;
- }
- if (!keys.includes('referer')) {
- headers['Referer'] = getHome(url);
- }
- obj.headers = headers;
- }
- if (rule.encoding && rule.encoding !== 'utf-8' && !ocr_flag) {
- if (!obj.headers.hasOwnProperty('Content-Type') && !obj.headers.hasOwnProperty('content-type')) { // 手动指定了就不管
- obj.headers["Content-Type"] = 'text/html; charset=' + rule.encoding;
- }
- }
- if (typeof (obj.body) != 'undefined' && obj.body && typeof (obj.body) === 'string') {
- // 传body加 "Content-Type":"application/x-www-form-urlencoded;" 即可post form
- if (!obj.headers.hasOwnProperty('Content-Type') && !obj.headers.hasOwnProperty('content-type')) { // 手动指定了就不管
- obj.headers["Content-Type"] = 'application/x-www-form-urlencoded; charset=' + rule.encoding;
- }
- } else if (typeof (obj.body) != 'undefined' && obj.body && typeof (obj.body) === 'object') {
- obj.data = obj.body;
- delete obj.body
- }
- if (!url) {
- return obj.withHeaders ? '{}' : ''
- }
- if (obj.toBase64) { // 返回base64,用于请求图片
- obj.buffer = 2;
- delete obj.toBase64
- }
- if (obj.redirect === false) {
- obj.redirect = 0;
- }
- if (obj.headers.hasOwnProperty('Content-Type') || obj.headers.hasOwnProperty('content-type')) {
- let _contentType = obj.headers["Content-Type"] || obj.headers["content-type"] || "";
- if (_contentType.includes("application/x-www-form-urlencoded")) {
- log("custom body is application/x-www-form-urlencoded");
- if (typeof obj.body == "string") {
- let temp_obj = parseQueryString(obj.body);
- console.log(JSON.stringify(temp_obj));
- }
- }
- }
- console.log(JSON.stringify(obj.headers));
- console.log('request:' + url + ` |method:${obj.method || 'GET'} |body:${obj.body || ''}`);
- let res = await req(url, obj);
- let html = res.content || '';
- if (obj.withHeaders) {
- let htmlWithHeaders = res.headers;
- htmlWithHeaders.body = html;
- return JSON.stringify(htmlWithHeaders);
- } else {
- return html
- }
- }
- var fetch = request;
- /**
- * 快捷post请求
- * @param url 地址
- * @param obj 对象
- * @returns {string|DocumentFragment|*}
- */
- async function post(url, obj) {
- obj = obj || {};
- obj.method = 'POST';
- return await request(url, obj);
- }
- /**
- * 快捷获取特殊地址cookie|一般用作搜索过验证
- * 用法 let {cookie,html} = reqCookie(url);
- * @param url 能返回cookie的地址
- * @param obj 常规请求参数
- * @param all_cookie 返回全部cookie.默认false只返回第一个,一般是PhpSessionId
- * @returns {{cookie: string, html: (*|string|DocumentFragment)}}
- */
- async function reqCookie(url, obj, all_cookie) {
- obj = obj || {};
- obj.withHeaders = true;
- all_cookie = all_cookie || false;
- let html = await request(url, obj);
- let json = JSON.parse(html);
- let setCk = Object.keys(json).find(it => it.toLowerCase() === 'set-cookie');
- let cookie = setCk ? json[setCk] : '';
- if (Array.isArray(cookie)) {
- cookie = cookie.join(';')
- }
- if (!all_cookie) {
- cookie = cookie.split(';')[0];
- }
- html = json.body;
- return {
- cookie,
- html
- }
- }
- /**
- * 检查宝塔验证并自动跳过获取正确源码
- * @param html 之前获取的html
- * @param url 之前的来源url
- * @param obj 来源obj
- * @returns {string|DocumentFragment|*}
- */
- async function checkHtml(html, url, obj) {
- if (/\?btwaf=/.test(html)) {
- let btwaf = html.match(/btwaf(.*?)"/)[1];
- url = url.split('#')[0] + '?btwaf' + btwaf;
- log('宝塔验证访问链接:' + url);
- html = await request(url, obj);
- }
- return html
- }
- /**
- * 带一次宝塔验证的源码获取
- * @param url 请求链接
- * @param obj 请求参数
- * @returns {string|DocumentFragment}
- */
- async function getCode(url, obj) {
- let html = await request(url, obj);
- html = checkHtml(html, url, obj);
- return html
- }
- /**
- * 源rule专用的请求方法,自动注入cookie
- * @param url 请求链接
- * @returns {string|DocumentFragment}
- */
- async function getHtml(url) {
- let obj = {};
- if (rule.headers) {
- obj.headers = rule.headers;
- }
- let cookie = getItem(RULE_CK, '');
- if (cookie) {
- // log('有cookie:'+cookie);
- if (obj.headers && !Object.keys(obj.headers).map(it => it.toLowerCase()).includes('cookie')) {
- log('历史无cookie,新增过验证后的cookie');
- obj.headers['Cookie'] = cookie;
- } else if (obj.headers && obj.headers.cookie && obj.headers.cookie !== cookie) {
- obj.headers['Cookie'] = cookie;
- log('历史有小写过期的cookie,更新过验证后的cookie');
- } else if (obj.headers && obj.headers.Cookie && obj.headers.Cookie !== cookie) {
- obj.headers['Cookie'] = cookie;
- log('历史有大写过期的cookie,更新过验证后的cookie');
- } else if (!obj.headers) {
- obj.headers = {Cookie: cookie};
- log('历史无headers,更新过验证后的含cookie的headers');
- }
- }
- let html = getCode(url, obj);
- return html
- }
- /**
- * 验证码识别,暂未实现
- * @param url 验证码图片链接
- * @returns {string} 验证成功后的cookie
- */
- async function verifyCode(url) {
- let cnt = 0;
- let host = getHome(url);
- let cookie = '';
- while (cnt < OCR_RETRY) {
- try {
- // let obj = {headers:headers,timeout:timeout};
- let yzm_url = `${host}/index.php/verify/index.html`;
- console.log(`验证码链接:${yzm_url}`);
- let hhtml = await request(yzm_url, {withHeaders: true, toBase64: true}, true);
- let json = JSON.parse(hhtml);
- if (!cookie) {
- // print(json);
- let setCk = Object.keys(json).find(it => it.toLowerCase() === 'set-cookie');
- // cookie = json['set-cookie']?json['set-cookie'].split(';')[0]:'';
- cookie = setCk ? json[setCk].split(';')[0] : '';
- }
- // console.log(hhtml);
- console.log('cookie:' + cookie);
- let img = json.body;
- // console.log(img);
- let code = await OcrApi.classification(img);
- console.log(`第${cnt + 1}次验证码识别结果:${code}`);
- let submit_url = `${host}/index.php/ajax/verify_check?type=search&verify=${code}`;
- console.log(submit_url);
- let html = await request(submit_url, {headers: {Cookie: cookie}, 'method': 'POST'});
- // console.log(html);
- html = JSON.parse(html);
- if (html.msg === 'ok') {
- console.log(`第${cnt + 1}次验证码提交成功`);
- return cookie // 需要返回cookie
- } else if (html.msg !== 'ok' && cnt + 1 >= OCR_RETRY) {
- cookie = ''; // 需要清空返回cookie
- }
- } catch (e) {
- console.log(`第${cnt + 1}次验证码提交失败:${e.message}`);
- if (cnt + 1 >= OCR_RETRY) {
- cookie = '';
- }
- }
- cnt += 1
- }
- return cookie
- }
- /**
- * 存在数据库配置表里, key字段对应值value,没有就新增,有就更新,调用此方法会清除key对应的内存缓存
- * @param k 键
- * @param v 值
- */
- function setItem(k, v) {
- local.set(RKEY, k, v);
- console.log(`规则${RKEY}设置${k} => ${v}`)
- }
- /**
- * 获取数据库配置表对应的key字段的value,没有这个key就返回value默认传参.需要有缓存,第一次获取后会存在内存里
- * @param k 键
- * @param v 值
- * @returns {*}
- */
- function getItem(k, v) {
- return local.get(RKEY, k) || v;
- }
- /**
- * 删除数据库key对应的一条数据,并清除此key对应的内存缓存
- * @param k
- */
- function clearItem(k) {
- local.delete(RKEY, k);
- }
- // jsp系列函数改到drpyS代码中,执行完rule和预处理过后再次注入,可以保证在rule定义范围外也能使用。这里也可以注释掉,没太多必要
- globalThis.jsp = new jsoup(rule.host || '');
- globalThis.pdfh = pdfh;
- globalThis.pd = pd;
- globalThis.pdfa = pdfa;
- globalThis.setItem = setItem;
- globalThis.getItem = getItem;
- globalThis.clearItem = clearItem;
- globalThis.request = request;
- globalThis.fetch = fetch;
|