Pattern_to_Categories_cluster.cpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. /* Pattern_to_Categories_cluster.cpp
  2. *
  3. * Copyright (C) 2007-2008 Ola So"der, 2010-2011,2016,2017 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. * os 20070529 initial release
  20. * pb 20100606 removed some array-creations-on-the-stack
  21. * pb 20100606 corrected some arrary-index-out-of-bounds errors
  22. * pb 20100606 corrected some memory leaks
  23. * pb 2011/03/08 tried to repair some of the header file chaos (several procedures here should be in KNN.c instead)
  24. * pb 2011/04/13 C++
  25. */
  26. #include "Pattern_to_Categories_cluster.h"
  27. #include "KNN.h" // BUG
  28. /////////////////////////////////////////////////////////////////////////////////////////////
  29. // Pattern_to_Categories_cluster //
  30. /////////////////////////////////////////////////////////////////////////////////////////////
  31. autoCategories PatternList_to_Categories_cluster
  32. (
  33. ///////////////////////////////
  34. // Parameters //
  35. ///////////////////////////////
  36. PatternList p, // source
  37. //
  38. FeatureWeights fws, // feature weights
  39. //
  40. integer k, // k(!)
  41. //
  42. double s, // clustersize constraint 0 < s <= 1
  43. //
  44. integer m // reseed maximum
  45. //
  46. )
  47. {
  48. autoCategories categories = Categories_createWithSequentialNumbers (k);
  49. if (k == p->ny)
  50. return categories;
  51. autoKNN knn = KNN_create();
  52. if (p -> ny % k)
  53. if (s > (double) (p -> ny / k) / (double) (p -> ny / k + 1)) // FIXME check whether integer division is correct
  54. s = (double) (p -> ny / k) / (double) (p -> ny / k + 1);
  55. double progress = m;
  56. autoNUMvector <double> sizes ((integer) 0, k);
  57. autoNUMvector <integer> seeds ((integer) 0, k);
  58. autoPatternList centroids = PatternList_create (k, p -> nx);
  59. autoNUMvector <double> beta ((integer) 0, centroids -> nx);
  60. do
  61. {
  62. double delta;
  63. integer nfriends = 0;
  64. Melder_progress (1 - (progress - m) / progress, U"");
  65. for (integer y = 1; y <= centroids->ny; y++)
  66. {
  67. int ifriend = 1;
  68. integer ys = Melder_iround (NUMrandomUniform (1, p -> ny)); // BUG probably wrong (the edges have half-probability)
  69. if (nfriends)
  70. {
  71. while (ifriend)
  72. {
  73. ys = Melder_iround (NUMrandomUniform (1, p->ny)); // BUG probably wrong (the edges have half-probability)
  74. for (integer fc = 0; fc < nfriends; fc++)
  75. {
  76. ifriend = 0;
  77. Melder_assert (fc < k);
  78. if (seeds [fc] == ys)
  79. {
  80. ifriend = 1;
  81. break;
  82. }
  83. }
  84. }
  85. }
  86. Melder_assert (nfriends <= k);
  87. seeds [nfriends++] = ys;
  88. for (integer x = 1; x <= centroids->nx; x++)
  89. centroids->z[y][x] = p->z[ys][x];
  90. }
  91. do
  92. {
  93. delta = 0;
  94. KNN_learn (knn.get(), centroids.get(), categories.get(), kOla_REPLACE, kOla_SEQUENTIAL);
  95. autoCategories interim = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING);
  96. for (integer x = 1; x <= k; x ++)
  97. sizes [x] = 0;
  98. for (integer yp = 1; yp <= categories->size; yp ++)
  99. {
  100. double alfa = 1;
  101. Melder_assert (yp <= centroids -> ny);
  102. for (integer x = 1; x <= centroids -> nx; x ++)
  103. {
  104. beta [x] = centroids -> z [yp] [x];
  105. }
  106. for (integer ys = 1; ys <= interim->size; ys ++)
  107. {
  108. if (FeatureWeights_areFriends (categories->at [yp], interim->at [ys]))
  109. {
  110. for (integer x = 1; x <= p -> nx; x ++)
  111. {
  112. Melder_assert (ys <= p -> ny);
  113. if (alfa == 1)
  114. {
  115. centroids -> z [yp] [x] = p -> z [ys] [x];
  116. }
  117. else
  118. {
  119. centroids -> z [yp] [x] += (p -> z [ys] [x] - centroids -> z [yp] [x]) / alfa;
  120. }
  121. }
  122. Melder_assert (yp <= k);
  123. sizes [yp] ++;
  124. alfa ++;
  125. }
  126. }
  127. for (integer x = 1; x <= centroids -> nx; x ++)
  128. {
  129. delta += fabs (beta [x] - centroids -> z [yp] [x]);
  130. }
  131. }
  132. }
  133. while (delta != 0.0);
  134. double smax = sizes [1];
  135. double smin = sizes [1];
  136. for (integer x = 1; x <= k; x++)
  137. {
  138. if (smax < sizes [x]) smax = sizes [x];
  139. if (smin > sizes [x]) smin = sizes [x];
  140. }
  141. sizes [0] = smin / smax;
  142. -- m;
  143. }
  144. while (sizes[0] < s && m > 0);
  145. autoCategories output = KNN_classifyToCategories (knn.get(), p, fws, 1, kOla_FLAT_VOTING);
  146. return output;
  147. }
  148. /* End of file Pattern_to_Categories_cluster.cpp */