language_data.js 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /*
  2. * language_data.js
  3. * ~~~~~~~~~~~~~~~~
  4. *
  5. * This script contains the language-specific data used by searchtools.js,
  6. * namely the list of stopwords, stemmer, scorer and splitter.
  7. *
  8. * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
  9. * :license: BSD, see LICENSE for details.
  10. *
  11. */
  12. var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
  13. /* Non-minified version is copied as a separate JS file, is available */
  14. /**
  15. * Porter Stemmer
  16. */
  17. var Stemmer = function() {
  18. var step2list = {
  19. ational: 'ate',
  20. tional: 'tion',
  21. enci: 'ence',
  22. anci: 'ance',
  23. izer: 'ize',
  24. bli: 'ble',
  25. alli: 'al',
  26. entli: 'ent',
  27. eli: 'e',
  28. ousli: 'ous',
  29. ization: 'ize',
  30. ation: 'ate',
  31. ator: 'ate',
  32. alism: 'al',
  33. iveness: 'ive',
  34. fulness: 'ful',
  35. ousness: 'ous',
  36. aliti: 'al',
  37. iviti: 'ive',
  38. biliti: 'ble',
  39. logi: 'log'
  40. };
  41. var step3list = {
  42. icate: 'ic',
  43. ative: '',
  44. alize: 'al',
  45. iciti: 'ic',
  46. ical: 'ic',
  47. ful: '',
  48. ness: ''
  49. };
  50. var c = "[^aeiou]"; // consonant
  51. var v = "[aeiouy]"; // vowel
  52. var C = c + "[^aeiouy]*"; // consonant sequence
  53. var V = v + "[aeiou]*"; // vowel sequence
  54. var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
  55. var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
  56. var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
  57. var s_v = "^(" + C + ")?" + v; // vowel in stem
  58. this.stemWord = function (w) {
  59. var stem;
  60. var suffix;
  61. var firstch;
  62. var origword = w;
  63. if (w.length < 3)
  64. return w;
  65. var re;
  66. var re2;
  67. var re3;
  68. var re4;
  69. firstch = w.substr(0,1);
  70. if (firstch == "y")
  71. w = firstch.toUpperCase() + w.substr(1);
  72. // Step 1a
  73. re = /^(.+?)(ss|i)es$/;
  74. re2 = /^(.+?)([^s])s$/;
  75. if (re.test(w))
  76. w = w.replace(re,"$1$2");
  77. else if (re2.test(w))
  78. w = w.replace(re2,"$1$2");
  79. // Step 1b
  80. re = /^(.+?)eed$/;
  81. re2 = /^(.+?)(ed|ing)$/;
  82. if (re.test(w)) {
  83. var fp = re.exec(w);
  84. re = new RegExp(mgr0);
  85. if (re.test(fp[1])) {
  86. re = /.$/;
  87. w = w.replace(re,"");
  88. }
  89. }
  90. else if (re2.test(w)) {
  91. var fp = re2.exec(w);
  92. stem = fp[1];
  93. re2 = new RegExp(s_v);
  94. if (re2.test(stem)) {
  95. w = stem;
  96. re2 = /(at|bl|iz)$/;
  97. re3 = new RegExp("([^aeiouylsz])\\1$");
  98. re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  99. if (re2.test(w))
  100. w = w + "e";
  101. else if (re3.test(w)) {
  102. re = /.$/;
  103. w = w.replace(re,"");
  104. }
  105. else if (re4.test(w))
  106. w = w + "e";
  107. }
  108. }
  109. // Step 1c
  110. re = /^(.+?)y$/;
  111. if (re.test(w)) {
  112. var fp = re.exec(w);
  113. stem = fp[1];
  114. re = new RegExp(s_v);
  115. if (re.test(stem))
  116. w = stem + "i";
  117. }
  118. // Step 2
  119. re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
  120. if (re.test(w)) {
  121. var fp = re.exec(w);
  122. stem = fp[1];
  123. suffix = fp[2];
  124. re = new RegExp(mgr0);
  125. if (re.test(stem))
  126. w = stem + step2list[suffix];
  127. }
  128. // Step 3
  129. re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
  130. if (re.test(w)) {
  131. var fp = re.exec(w);
  132. stem = fp[1];
  133. suffix = fp[2];
  134. re = new RegExp(mgr0);
  135. if (re.test(stem))
  136. w = stem + step3list[suffix];
  137. }
  138. // Step 4
  139. re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
  140. re2 = /^(.+?)(s|t)(ion)$/;
  141. if (re.test(w)) {
  142. var fp = re.exec(w);
  143. stem = fp[1];
  144. re = new RegExp(mgr1);
  145. if (re.test(stem))
  146. w = stem;
  147. }
  148. else if (re2.test(w)) {
  149. var fp = re2.exec(w);
  150. stem = fp[1] + fp[2];
  151. re2 = new RegExp(mgr1);
  152. if (re2.test(stem))
  153. w = stem;
  154. }
  155. // Step 5
  156. re = /^(.+?)e$/;
  157. if (re.test(w)) {
  158. var fp = re.exec(w);
  159. stem = fp[1];
  160. re = new RegExp(mgr1);
  161. re2 = new RegExp(meq1);
  162. re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  163. if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
  164. w = stem;
  165. }
  166. re = /ll$/;
  167. re2 = new RegExp(mgr1);
  168. if (re.test(w) && re2.test(w)) {
  169. re = /.$/;
  170. w = w.replace(re,"");
  171. }
  172. // and turn initial Y back to y
  173. if (firstch == "y")
  174. w = firstch.toLowerCase() + w.substr(1);
  175. return w;
  176. }
  177. }