liquid-dict.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. /*******************************************************************************
  2. ηMatrix - a browser extension to black/white list requests.
  3. Copyright (C) 2014-2019 Raymond Hill
  4. Copyright (C) 2019-2022 Alessio Vanni
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see {http://www.gnu.org/licenses/}.
  15. Home: https://gitlab.com/vannilla/ematrix
  16. uMatrix Home: https://github.com/gorhill/uMatrix
  17. */
  18. 'use strict';
  19. /******************************************************************************/
  20. ηMatrix.LiquidDict = (function() {
  21. /******************************************************************************/
  22. var LiquidDict = function() {
  23. this.dict = {};
  24. this.count = 0;
  25. this.duplicateCount = 0;
  26. this.bucketCount = 0;
  27. this.frozenBucketCount = 0;
  28. // Somewhat arbitrary: I need to come up with hard data to know at which
  29. // point binary search is better than indexOf.
  30. this.cutoff = 500;
  31. };
  32. /******************************************************************************/
  33. var meltBucket = function(ldict, len, bucket) {
  34. ldict.frozenBucketCount -= 1;
  35. var map = {};
  36. if ( bucket.charAt(0) === ' ' ) {
  37. bucket.trim().split(' ').map(function(k) {
  38. map[k] = true;
  39. });
  40. } else {
  41. var offset = 0;
  42. while ( offset < bucket.length ) {
  43. map[bucket.substring(offset, len)] = true;
  44. offset += len;
  45. }
  46. }
  47. return map;
  48. };
  49. /******************************************************************************/
  50. var melt = function(ldict) {
  51. var buckets = ldict.dict;
  52. var bucket;
  53. for ( var key in buckets ) {
  54. bucket = buckets[key];
  55. if ( typeof bucket === 'string' ) {
  56. buckets[key] = meltBucket(ldict, key.charCodeAt(0) & 0xFF, bucket);
  57. }
  58. }
  59. };
  60. /******************************************************************************/
  61. var freezeBucket = function(ldict, bucket) {
  62. ldict.frozenBucketCount += 1;
  63. var words = Object.keys(bucket);
  64. var wordLen = words[0].length;
  65. if ( wordLen * words.length < ldict.cutoff ) {
  66. return ' ' + words.join(' ') + ' ';
  67. }
  68. return words.sort().join('');
  69. };
  70. /******************************************************************************/
  71. // How the key is derived dictates the number and size of buckets.
  72. //
  73. // http://jsperf.com/makekey-concat-vs-join/3
  74. //
  75. // Question: Why is using a prototyped function better than a standalone
  76. // helper function?
  77. LiquidDict.prototype.makeKey = function(word) {
  78. var len = word.length;
  79. if ( len > 255 ) {
  80. len = 255;
  81. }
  82. var i = len >> 2;
  83. return String.fromCharCode(
  84. (word.charCodeAt( 0) & 0x03) << 14 |
  85. (word.charCodeAt( i) & 0x03) << 12 |
  86. (word.charCodeAt( i+i) & 0x03) << 10 |
  87. (word.charCodeAt(i+i+i) & 0x03) << 8 |
  88. len
  89. );
  90. };
  91. /******************************************************************************/
  92. LiquidDict.prototype.test = function(word) {
  93. var key = this.makeKey(word);
  94. var bucket = this.dict[key];
  95. if ( bucket === undefined ) {
  96. return false;
  97. }
  98. if ( typeof bucket === 'object' ) {
  99. return bucket[word] !== undefined;
  100. }
  101. if ( bucket.charAt(0) === ' ' ) {
  102. return bucket.indexOf(' ' + word + ' ') >= 0;
  103. }
  104. // binary search
  105. var len = word.length;
  106. var left = 0;
  107. // http://jsperf.com/or-vs-floor/3
  108. var right = ~~(bucket.length / len + 0.5);
  109. var i, needle;
  110. while ( left < right ) {
  111. i = left + right >> 1;
  112. needle = bucket.substr( len * i, len );
  113. if ( word < needle ) {
  114. right = i;
  115. } else if ( word > needle ) {
  116. left = i + 1;
  117. } else {
  118. return true;
  119. }
  120. }
  121. return false;
  122. };
  123. /******************************************************************************/
  124. LiquidDict.prototype.add = function(word) {
  125. var key = this.makeKey(word);
  126. if ( key === undefined ) {
  127. return false;
  128. }
  129. var bucket = this.dict[key];
  130. if ( bucket === undefined ) {
  131. this.dict[key] = bucket = {};
  132. this.bucketCount += 1;
  133. bucket[word] = true;
  134. this.count += 1;
  135. return true;
  136. } else if ( typeof bucket === 'string' ) {
  137. this.dict[key] = bucket = meltBucket(this, word.len, bucket);
  138. }
  139. if ( bucket[word] === undefined ) {
  140. bucket[word] = true;
  141. this.count += 1;
  142. return true;
  143. }
  144. this.duplicateCount += 1;
  145. return false;
  146. };
  147. /******************************************************************************/
  148. LiquidDict.prototype.freeze = function() {
  149. var buckets = this.dict;
  150. var bucket;
  151. for ( var key in buckets ) {
  152. bucket = buckets[key];
  153. if ( typeof bucket === 'object' ) {
  154. buckets[key] = freezeBucket(this, bucket);
  155. }
  156. }
  157. };
  158. /******************************************************************************/
  159. LiquidDict.prototype.reset = function() {
  160. this.dict = {};
  161. this.count = 0;
  162. this.duplicateCount = 0;
  163. this.bucketCount = 0;
  164. this.frozenBucketCount = 0;
  165. };
  166. /******************************************************************************/
  167. return LiquidDict;
  168. /******************************************************************************/
  169. })();
  170. /******************************************************************************/
  171. ηMatrix.ubiquitousBlacklist = new ηMatrix.LiquidDict();
  172. ηMatrix.ubiquitousWhitelist = new ηMatrix.LiquidDict();