KNN_prune.cpp 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. /* KNN_prune.cpp
  2. *
  3. * Copyright (C) 2007-2008 Ola So"der, 2010-2011,2015,2016,2017 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. * os 2007/05/29 Initial release
  20. * os 2009/01/23 Bugfix: Rejects classifiers containing 0 or 1 instances
  21. * pb 2010/06/06 removed some array-creations-on-the-stack
  22. * pb 2011/04/14 C++
  23. * pb 2011/04/16 removed memory leaks
  24. */
  25. #include "KNN_prune.h"
  26. /////////////////////////////////////////////////////////////////////////////////////////////
  27. // Prune //
  28. /////////////////////////////////////////////////////////////////////////////////////////////
  29. integer KNN_prune_prune
  30. (
  31. KNN me, // the classifier to be pruned
  32. double n, // pruning degree: noise, 0 <= n <= 1
  33. double r, // pruning redundancy: noise, 0 <= n <= 1
  34. integer k // k(!)
  35. )
  36. {
  37. autoCategories uniqueCategories = Categories_selectUniqueItems (my output.get());
  38. if (uniqueCategories->size == my nInstances)
  39. return 0;
  40. integer removals = 0;
  41. integer ncandidates = 0;
  42. autoNUMvector <integer> candidates ((integer) 0, my nInstances - 1);
  43. if (my nInstances <= 1)
  44. return 0;
  45. for (integer y = 1; y <= my nInstances; y ++) {
  46. if (KNN_prune_noisy (my input.get(), my output.get(), y, k)) {
  47. if (n == 1 || NUMrandomUniform (0, 1) <= n) {
  48. KNN_removeInstance (me, y);
  49. ++ removals;
  50. }
  51. }
  52. }
  53. for (integer y = 1; y <= my nInstances; ++ y) {
  54. if (KNN_prune_superfluous (my input.get(), my output.get(), y, k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), y, k))
  55. candidates [ncandidates ++] = y;
  56. }
  57. KNN_prune_sort (my input.get(), my output.get(), k, candidates.peek(), ncandidates);
  58. for (integer y = 0; y < ncandidates; ++ y) {
  59. if (KNN_prune_superfluous (my input.get(), my output.get(), candidates [y], k, 0) && ! KNN_prune_critical (my input.get(), my output.get(), candidates [y], k)) {
  60. if (r == 1.0 || NUMrandomUniform (0.0, 1.0) <= r) {
  61. KNN_removeInstance (me, candidates[y]);
  62. for (integer i = y + 1; i < ncandidates; ++ i) {
  63. if(candidates[i] > candidates[y])
  64. -- candidates[i];
  65. }
  66. ++ removals;
  67. }
  68. }
  69. }
  70. return removals;
  71. }
  72. /////////////////////////////////////////////////////////////////////////////////////////////
  73. // sort indices according to pruning order defined by rule 2 //
  74. /////////////////////////////////////////////////////////////////////////////////////////////
  75. void KNN_prune_sort
  76. (
  77. PatternList p, // source
  78. Categories c, // source
  79. integer k, // k(!)
  80. integer * indices, // indices of instances to be sorted
  81. integer nindices // the number of instances to be sorted
  82. )
  83. {
  84. integer n = nindices;
  85. autoNUMvector <integer> h ((integer) 0, nindices - 1);
  86. for (integer cc = 0; cc < nindices; ++ cc)
  87. h [cc] = KNN_friendsAmongkNeighbours (p, p, c, indices [cc], k);
  88. while (-- n) { // insertion-sort, is heap-sort worth the effort?
  89. for (integer m = n; m < nindices - 1; m ++) {
  90. if (h [m - 1] > h[m])
  91. break;
  92. if (h [m - 1] < h[m]) {
  93. OlaSWAP (integer, indices [m - 1], indices [m]);
  94. } else {
  95. if (KNN_nearestEnemy (p, p, c, indices [m - 1]) < KNN_nearestEnemy (p, p, c, indices [m])) {
  96. OlaSWAP (integer, indices [m - 1], indices [m]);
  97. } else {
  98. if (NUMrandomUniform (0, 1) > 0.5) {
  99. OlaSWAP (integer, indices [m - 1], indices [m]);
  100. }
  101. }
  102. }
  103. }
  104. }
  105. }
  106. /////////////////////////////////////////////////////////////////////////////////////////////
  107. // k-coverage //
  108. /////////////////////////////////////////////////////////////////////////////////////////////
  109. integer KNN_prune_kCoverage
  110. (
  111. PatternList p, // source
  112. Categories c, // source
  113. integer y, // source instance index
  114. integer k, // k(!)
  115. integer * indices // Out: kCoverage set
  116. )
  117. {
  118. Melder_assert (y <= p->ny);
  119. Melder_assert (k > 0 && k <= p->ny);
  120. integer cc = 0;
  121. autoFeatureWeights fws = FeatureWeights_create (p -> nx);
  122. autoNUMvector <integer> tempindices ((integer) 0, p -> ny - 1);
  123. for (integer yy = 1; yy <= p -> ny; yy ++) {
  124. if (y != yy && FeatureWeights_areFriends (c->at [y], c->at [yy])) {
  125. integer n = KNN_kNeighboursSkip (p, p, fws.get(), yy, k, tempindices.peek(), y);
  126. while (n) {
  127. Melder_assert (n <= p -> ny);
  128. if (tempindices [-- n] == y) {
  129. indices [cc ++] = yy;
  130. break;
  131. }
  132. }
  133. }
  134. }
  135. return cc;
  136. }
  137. /////////////////////////////////////////////////////////////////////////////////////////////
  138. // testing for superfluousness //
  139. /////////////////////////////////////////////////////////////////////////////////////////////
  140. int KNN_prune_superfluous
  141. (
  142. PatternList p, // source
  143. Categories c, // source
  144. integer y, // source instance index
  145. integer k, // k(!)
  146. integer skipper // Skipping instance skipper
  147. )
  148. {
  149. if (y > p -> ny) y = p -> ny; // safety belt
  150. if (k > p -> ny) k = p -> ny;
  151. autoFeatureWeights fws = FeatureWeights_create (p -> nx);
  152. autoNUMvector <integer> indices ((integer) 0, k - 1);
  153. autoNUMvector <integer> freqindices ((integer) 0, k - 1);
  154. autoNUMvector <double> distances ((integer) 0, k - 1);
  155. autoNUMvector <double> freqs ((integer) 0, k - 1);
  156. if (! KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), skipper)) return 0;
  157. integer ncategories = KNN_kIndicesToFrequenciesAndDistances (c, k, indices.peek(), distances.peek(), freqs.peek(), freqindices.peek());
  158. int result = FeatureWeights_areFriends (c->at [y], c->at [freqindices [KNN_max (freqs.peek(), ncategories)]]);
  159. if (result)
  160. return 1;
  161. return 0;
  162. }
  163. /////////////////////////////////////////////////////////////////////////////////////////////
  164. // testing for criticalness //
  165. /////////////////////////////////////////////////////////////////////////////////////////////
  166. int KNN_prune_critical
  167. (
  168. PatternList p, // source
  169. Categories c, // source
  170. integer y, // source instance index
  171. integer k // k(!)
  172. )
  173. {
  174. if (y > p -> ny) y = p -> ny; // safety belt
  175. if (k > p -> ny) k = p -> ny;
  176. autoFeatureWeights fws = FeatureWeights_create (p -> nx);
  177. autoNUMvector <integer> indices ((integer) 0, k - 1);
  178. integer ncollected = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y);
  179. for (integer ic = 0; ic < ncollected; ic ++) {
  180. if (! KNN_prune_superfluous (p, c, indices [ic], k, 0) || ! KNN_prune_superfluous (p, c, indices [ic], k, y)) {
  181. return 1;
  182. }
  183. }
  184. return 0;
  185. }
  186. /////////////////////////////////////////////////////////////////////////////////////////////
  187. // testing for noisyness //
  188. /////////////////////////////////////////////////////////////////////////////////////////////
  189. int KNN_prune_noisy
  190. (
  191. PatternList p, // source
  192. Categories c, // source
  193. integer y, // source instance index
  194. integer k // k(!)
  195. )
  196. {
  197. if (y > p -> ny) y = p -> ny; // safety belt
  198. if (k > p -> ny) k = p -> ny;
  199. autoFeatureWeights fws = FeatureWeights_create (p -> nx);
  200. autoNUMvector <integer> indices ((integer) 0, p->ny - 1); // the coverage is not bounded by k but by n
  201. integer reachability = KNN_kNeighboursSkip (p, p, fws.get(), y, k, indices.peek(), y);
  202. integer coverage = KNN_prune_kCoverage (p, c, y, k, indices.peek());
  203. if (! KNN_prune_superfluous (p, c, y, k, 0) && reachability > coverage)
  204. return 1;
  205. return 0;
  206. }
  207. /* End of file KNN_prune.cpp */