util.ym.js 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import 'assets://js/lib/uri.min.js'
  2. import cheerio from 'assets://js/lib/cheerio.min.js';
  3. import 'assets://js/lib/crypto-js.js'
  4. var charStr = 'abacdefghjklmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ0123456789';
  5. export function randIndex(min, max, i) {
  6. let index = Math.floor(Math.random() * (max - min + 1) + min),
  7. numStart = charStr.length - 10;
  8. if (i == 0 && index >= numStart) {
  9. index = randIndex(min, max, i);
  10. }
  11. return index;
  12. }
  13. export function randomStr(len) {
  14. let min = 0, max = charStr.length - 1, _str = '';
  15. len = len || 15;
  16. for (var i = 0, index; i < len; i++) {
  17. index = randIndex(min, max, i);
  18. _str += charStr[index];
  19. }
  20. return _str;
  21. }
  22. export function urljoin(base, url) {
  23. base = base || '';
  24. url = url || '';
  25. let baseU = new Uri(base.trim().rstrip('/'));
  26. url = url.trim().rstrip('/');
  27. let u = undefined;
  28. if (url.startsWith('http://') || url.startsWith('https://')) {
  29. u = new Uri(url);
  30. } else if (url.startsWith('://')) {
  31. u = new Uri(baseU.protocol() + url);
  32. } else if (url.startsWith('//')) {
  33. u = new Uri(baseU.protocol() + ':' + url);
  34. } else {
  35. u = new Uri(baseU.protocol() + '://' + baseU.host() + (baseU.port() ? ':' + baseU.port() : '') + '/' + url);
  36. }
  37. if ((!u.path() || u.path().trim().length === 0) && baseU.path())
  38. u.path(baseU.path());
  39. if (!u.query() && baseU.query())
  40. u.query(baseU.query());
  41. return u.toString();
  42. }
  43. const DOM_CHECK_ATTR = /(url|src|href|data-original|data-src)$/;
  44. const SELECT_REGEX = /:eq|:lt|:gt|#/g;
  45. const SELECT_REGEX_A = /:eq|:lt|:gt/g;
  46. export function pdfh(html, parse, base_url) {
  47. if (!parse || !parse.trim()) {
  48. return ''
  49. }
  50. let eleFind = typeof html === 'object';
  51. let option = undefined;
  52. if (eleFind && parse.startsWith('body&&')) {
  53. parse = parse.substr(6);
  54. if (parse.indexOf('&&') < 0) {
  55. option = parse.trim();
  56. parse = '*=*';
  57. }
  58. }
  59. if (parse.indexOf('&&') > -1) {
  60. let sp = parse.split('&&');
  61. option = sp[sp.length - 1];
  62. sp.splice(sp.length - 1);
  63. if (sp.length > 1) {
  64. for (let i in sp) {
  65. if (!SELECT_REGEX.test(sp[i])) {
  66. sp[i] = sp[i] + ':eq(0)';
  67. }
  68. }
  69. } else {
  70. if (!SELECT_REGEX.test(sp[0])) {
  71. sp[0] = sp[0] + ':eq(0)';
  72. }
  73. }
  74. parse = sp.join(' ');
  75. }
  76. let result = '';
  77. const $ = eleFind ? html.rr : cheerio.load(html);
  78. let ret = eleFind ? ((parse === '*=*' || $(html.ele).is(parse)) ? html.ele : $(html.ele).find(parse)) : $(parse);
  79. if (option) {
  80. if (option === 'Text') {
  81. result = $(ret).text();
  82. }
  83. else if (option === 'Html') {
  84. result = $(ret).html();
  85. }
  86. else {
  87. result = $(ret).attr(option);
  88. }
  89. if (result && base_url && DOM_CHECK_ATTR.test(option)) {
  90. if (/http/.test(result)) {
  91. result = result.substr(result.indexOf('http'));
  92. } else {
  93. result = urljoin(base_url, result)
  94. }
  95. }
  96. } else {
  97. result = $(ret).toString();
  98. }
  99. return result;
  100. }
  101. export function pdfa(html, parse) {
  102. if (!parse || !parse.trim()) {
  103. return [];
  104. }
  105. let eleFind = typeof html === 'object';
  106. if (parse.indexOf('&&') > -1) {
  107. let sp = parse.split('&&');
  108. for (let i in sp) {
  109. if (!SELECT_REGEX_A.test(sp[i]) && i < sp.length - 1) {
  110. sp[i] = sp[i] + ':eq(0)';
  111. }
  112. }
  113. parse = sp.join(' ');
  114. }
  115. const $ = eleFind ? html.rr : cheerio.load(html);
  116. let ret = eleFind ? ($(html.ele).is(parse) ? html.ele : $(html.ele).find(parse)) : $(parse);
  117. let result = [];
  118. if (ret) {
  119. ret.each(function (idx, ele) {
  120. result.push({ rr: $, ele: ele });
  121. });
  122. }
  123. return result;
  124. }
  125. const defaultParser = {
  126. pdfh:pdfh,
  127. pdfa:pdfa,
  128. pd(html,parse,uri){
  129. let ret = this.pdfh(html,parse);
  130. if(typeof(uri)==='undefined'||!uri){
  131. uri = '';
  132. }
  133. if(DOM_CHECK_ATTR.test(parse)){
  134. if(/http/.test(ret)){
  135. ret = ret.substr(ret.indexOf('http'));
  136. }else{
  137. ret = urljoin(MY_URL,ret)
  138. }
  139. }
  140. return ret
  141. },
  142. };
  143. globalThis.randIndex = randIndex;
  144. globalThis.randomStr = randomStr;
  145. globalThis.urljoin = urljoin;
  146. globalThis.joinUrl = urljoin;
  147. globalThis.defaultParser = defaultParser;
  148. globalThis.pdfa = defaultParser.pdfa;
  149. globalThis.pdfh = defaultParser.pdfh;
  150. globalThis.pd = defaultParser.pd;