req-extend.js 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. // async function request(url, options) {
  2. // try {
  3. // log('rule:',typeof rule);
  4. // log('headers:',rule.headers);
  5. // log('title:',rule.title);
  6. // log('getHome:',typeof getHome);
  7. // log('gzip',typeof(gzip))
  8. // log('fetch_params',typeof(fetch_params))
  9. // return (await req(url, options)).content
  10. // } catch (e) {
  11. // log(`requestHtml error:${e.message}`);
  12. // return ''
  13. // }
  14. // }
  15. // var key = '源的唯一ID' // 允许在源里自定义设置key,不设置就自动取title或者host
  16. const RKEY = typeof (key) !== 'undefined' && key ? key : 'drpyS_' + (rule.title || rule.host); // 源的唯一标识
  17. /**
  18. * 海阔网页请求函数完整封装
  19. * @param url 请求链接
  20. * @param obj 请求对象 {headers:{},method:'',timeout:5000,body:'',withHeaders:false}
  21. * @param ocr_flag 标识此flag是用于请求ocr识别的,自动过滤content-type指定编码
  22. * @returns {string|string|DocumentFragment|*}
  23. */
  24. async function request(url, obj, ocr_flag) {
  25. ocr_flag = ocr_flag || false;
  26. if (typeof (obj) === 'undefined' || !obj || obj === {}) {
  27. let fetch_params = {};
  28. let headers = {
  29. 'User-Agent': MOBILE_UA,
  30. };
  31. if (rule.headers) {
  32. Object.assign(headers, rule.headers);
  33. }
  34. let keys = Object.keys(headers).map(it => it.toLowerCase());
  35. if (!keys.includes('referer')) {
  36. headers['Referer'] = getHome(url);
  37. }
  38. fetch_params.headers = headers;
  39. obj = fetch_params;
  40. } else {
  41. let headers = obj.headers || {};
  42. let keys = Object.keys(headers).map(it => it.toLowerCase());
  43. if (!keys.includes('user-agent')) {
  44. headers['User-Agent'] = MOBILE_UA;
  45. }
  46. if (!keys.includes('referer')) {
  47. headers['Referer'] = getHome(url);
  48. }
  49. obj.headers = headers;
  50. }
  51. if (rule.encoding && rule.encoding !== 'utf-8' && !ocr_flag) {
  52. if (!obj.headers.hasOwnProperty('Content-Type') && !obj.headers.hasOwnProperty('content-type')) { // 手动指定了就不管
  53. obj.headers["Content-Type"] = 'text/html; charset=' + rule.encoding;
  54. }
  55. }
  56. if (typeof (obj.body) != 'undefined' && obj.body && typeof (obj.body) === 'string') {
  57. // 传body加 "Content-Type":"application/x-www-form-urlencoded;" 即可post form
  58. if (!obj.headers.hasOwnProperty('Content-Type') && !obj.headers.hasOwnProperty('content-type')) { // 手动指定了就不管
  59. obj.headers["Content-Type"] = 'application/x-www-form-urlencoded; charset=' + rule.encoding;
  60. }
  61. } else if (typeof (obj.body) != 'undefined' && obj.body && typeof (obj.body) === 'object') {
  62. obj.data = obj.body;
  63. delete obj.body
  64. }
  65. if (!url) {
  66. return obj.withHeaders ? '{}' : ''
  67. }
  68. if (obj.toBase64) { // 返回base64,用于请求图片
  69. obj.buffer = 2;
  70. delete obj.toBase64
  71. }
  72. if (obj.redirect === false) {
  73. obj.redirect = 0;
  74. }
  75. if (obj.headers.hasOwnProperty('Content-Type') || obj.headers.hasOwnProperty('content-type')) {
  76. let _contentType = obj.headers["Content-Type"] || obj.headers["content-type"] || "";
  77. if (_contentType.includes("application/x-www-form-urlencoded")) {
  78. log("custom body is application/x-www-form-urlencoded");
  79. if (typeof obj.body == "string") {
  80. let temp_obj = parseQueryString(obj.body);
  81. console.log(JSON.stringify(temp_obj));
  82. }
  83. }
  84. }
  85. console.log(JSON.stringify(obj.headers));
  86. console.log('request:' + url + ` |method:${obj.method || 'GET'} |body:${obj.body || ''}`);
  87. let res = await req(url, obj);
  88. let html = res.content || '';
  89. if (obj.withHeaders) {
  90. let htmlWithHeaders = res.headers;
  91. htmlWithHeaders.body = html;
  92. return JSON.stringify(htmlWithHeaders);
  93. } else {
  94. return html
  95. }
  96. }
  97. var fetch = request;
  98. /**
  99. * 快捷post请求
  100. * @param url 地址
  101. * @param obj 对象
  102. * @returns {string|DocumentFragment|*}
  103. */
  104. async function post(url, obj) {
  105. obj = obj || {};
  106. obj.method = 'POST';
  107. return await request(url, obj);
  108. }
  109. /**
  110. * 快捷获取特殊地址cookie|一般用作搜索过验证
  111. * 用法 let {cookie,html} = reqCookie(url);
  112. * @param url 能返回cookie的地址
  113. * @param obj 常规请求参数
  114. * @param all_cookie 返回全部cookie.默认false只返回第一个,一般是PhpSessionId
  115. * @returns {{cookie: string, html: (*|string|DocumentFragment)}}
  116. */
  117. async function reqCookie(url, obj, all_cookie) {
  118. obj = obj || {};
  119. obj.withHeaders = true;
  120. all_cookie = all_cookie || false;
  121. let html = await request(url, obj);
  122. let json = JSON.parse(html);
  123. let setCk = Object.keys(json).find(it => it.toLowerCase() === 'set-cookie');
  124. let cookie = setCk ? json[setCk] : '';
  125. if (Array.isArray(cookie)) {
  126. cookie = cookie.join(';')
  127. }
  128. if (!all_cookie) {
  129. cookie = cookie.split(';')[0];
  130. }
  131. html = json.body;
  132. return {
  133. cookie,
  134. html
  135. }
  136. }
  137. /**
  138. * 检查宝塔验证并自动跳过获取正确源码
  139. * @param html 之前获取的html
  140. * @param url 之前的来源url
  141. * @param obj 来源obj
  142. * @returns {string|DocumentFragment|*}
  143. */
  144. async function checkHtml(html, url, obj) {
  145. if (/\?btwaf=/.test(html)) {
  146. let btwaf = html.match(/btwaf(.*?)"/)[1];
  147. url = url.split('#')[0] + '?btwaf' + btwaf;
  148. log('宝塔验证访问链接:' + url);
  149. html = await request(url, obj);
  150. }
  151. return html
  152. }
  153. /**
  154. * 带一次宝塔验证的源码获取
  155. * @param url 请求链接
  156. * @param obj 请求参数
  157. * @returns {string|DocumentFragment}
  158. */
  159. async function getCode(url, obj) {
  160. let html = await request(url, obj);
  161. html = checkHtml(html, url, obj);
  162. return html
  163. }
  164. /**
  165. * 源rule专用的请求方法,自动注入cookie
  166. * @param url 请求链接
  167. * @returns {string|DocumentFragment}
  168. */
  169. async function getHtml(url) {
  170. let obj = {};
  171. if (rule.headers) {
  172. obj.headers = rule.headers;
  173. }
  174. let cookie = getItem(RULE_CK, '');
  175. if (cookie) {
  176. // log('有cookie:'+cookie);
  177. if (obj.headers && !Object.keys(obj.headers).map(it => it.toLowerCase()).includes('cookie')) {
  178. log('历史无cookie,新增过验证后的cookie');
  179. obj.headers['Cookie'] = cookie;
  180. } else if (obj.headers && obj.headers.cookie && obj.headers.cookie !== cookie) {
  181. obj.headers['Cookie'] = cookie;
  182. log('历史有小写过期的cookie,更新过验证后的cookie');
  183. } else if (obj.headers && obj.headers.Cookie && obj.headers.Cookie !== cookie) {
  184. obj.headers['Cookie'] = cookie;
  185. log('历史有大写过期的cookie,更新过验证后的cookie');
  186. } else if (!obj.headers) {
  187. obj.headers = {Cookie: cookie};
  188. log('历史无headers,更新过验证后的含cookie的headers');
  189. }
  190. }
  191. let html = getCode(url, obj);
  192. return html
  193. }
  194. /**
  195. * 验证码识别,暂未实现
  196. * @param url 验证码图片链接
  197. * @returns {string} 验证成功后的cookie
  198. */
  199. async function verifyCode(url) {
  200. let cnt = 0;
  201. let host = getHome(url);
  202. let cookie = '';
  203. while (cnt < OCR_RETRY) {
  204. try {
  205. // let obj = {headers:headers,timeout:timeout};
  206. let yzm_url = `${host}/index.php/verify/index.html`;
  207. console.log(`验证码链接:${yzm_url}`);
  208. let hhtml = await request(yzm_url, {withHeaders: true, toBase64: true}, true);
  209. let json = JSON.parse(hhtml);
  210. if (!cookie) {
  211. // print(json);
  212. let setCk = Object.keys(json).find(it => it.toLowerCase() === 'set-cookie');
  213. // cookie = json['set-cookie']?json['set-cookie'].split(';')[0]:'';
  214. cookie = setCk ? json[setCk].split(';')[0] : '';
  215. }
  216. // console.log(hhtml);
  217. console.log('cookie:' + cookie);
  218. let img = json.body;
  219. // console.log(img);
  220. let code = await OcrApi.classification(img);
  221. console.log(`第${cnt + 1}次验证码识别结果:${code}`);
  222. let submit_url = `${host}/index.php/ajax/verify_check?type=search&verify=${code}`;
  223. console.log(submit_url);
  224. let html = await request(submit_url, {headers: {Cookie: cookie}, 'method': 'POST'});
  225. // console.log(html);
  226. html = JSON.parse(html);
  227. if (html.msg === 'ok') {
  228. console.log(`第${cnt + 1}次验证码提交成功`);
  229. return cookie // 需要返回cookie
  230. } else if (html.msg !== 'ok' && cnt + 1 >= OCR_RETRY) {
  231. cookie = ''; // 需要清空返回cookie
  232. }
  233. } catch (e) {
  234. console.log(`第${cnt + 1}次验证码提交失败:${e.message}`);
  235. if (cnt + 1 >= OCR_RETRY) {
  236. cookie = '';
  237. }
  238. }
  239. cnt += 1
  240. }
  241. return cookie
  242. }
  243. /**
  244. * 存在数据库配置表里, key字段对应值value,没有就新增,有就更新,调用此方法会清除key对应的内存缓存
  245. * @param k 键
  246. * @param v 值
  247. */
  248. function setItem(k, v) {
  249. local.set(RKEY, k, v);
  250. console.log(`规则${RKEY}设置${k} => ${v}`)
  251. }
  252. /**
  253. * 获取数据库配置表对应的key字段的value,没有这个key就返回value默认传参.需要有缓存,第一次获取后会存在内存里
  254. * @param k 键
  255. * @param v 值
  256. * @returns {*}
  257. */
  258. function getItem(k, v) {
  259. return local.get(RKEY, k) || v;
  260. }
  261. /**
  262. * 删除数据库key对应的一条数据,并清除此key对应的内存缓存
  263. * @param k
  264. */
  265. function clearItem(k) {
  266. local.delete(RKEY, k);
  267. }
  268. // jsp系列函数改到drpyS代码中,执行完rule和预处理过后再次注入,可以保证在rule定义范围外也能使用。这里也可以注释掉,没太多必要
  269. globalThis.jsp = new jsoup(rule.host || '');
  270. globalThis.pdfh = pdfh;
  271. globalThis.pd = pd;
  272. globalThis.pdfa = pdfa;
  273. globalThis.setItem = setItem;
  274. globalThis.getItem = getItem;
  275. globalThis.clearItem = clearItem;
  276. globalThis.request = request;
  277. globalThis.fetch = fetch;