Discriminant.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /* Discriminant.cpp
  2. *
  3. * Copyright (C) 1993-2018 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20011016 removed some causes for compiler warnings
  20. djmw 20020313 removed obsolete TableOfReal_sortByLabels method
  21. djmw 20020314 +Discriminant_extractWithinGroupSSCP,
  22. +Discriminant_extractGroupLabels, +Discriminant_setGroupLabels.
  23. djmw 20020327 modified Discriminant_TableOfReal_to_Configuration
  24. djmw 20020418 Removed some causes for compiler warnings
  25. djmw 20020502 modified call Eigen_TableOfReal_project_into
  26. djmw 20030801 Discriminant_drawConcentrationEllipses extra argument
  27. djmw 20050405 Modified column label: eigenvector->Eigenvector
  28. djmw 20061212 Changed info to Melder_writeLine<x> format.
  29. djmw 20071009 wchar
  30. djmw 20071012 Added: o_CAN_WRITE_AS_ENCODING.h
  31. djmw 20071201 Melder_warning<n>
  32. djmw 20081119 Check in TableOfReal_to_Discriminant if TableOfReal_areAllCellsDefined
  33. djmw 20100107 +Discriminant_TableOfReal_mahalanobis
  34. djmw 20110304 Thing_new
  35. */
  36. #include "Discriminant.h"
  37. #include "SSCP.h"
  38. #include "Eigen_and_SSCP.h"
  39. #include "Eigen_and_TableOfReal.h"
  40. #include "SVD.h"
  41. #include "NUM2.h"
  42. #include "TableOfReal_extensions.h"
  43. #include "oo_DESTROY.h"
  44. #include "Discriminant_def.h"
  45. #include "oo_COPY.h"
  46. #include "Discriminant_def.h"
  47. #include "oo_EQUAL.h"
  48. #include "Discriminant_def.h"
  49. #include "oo_CAN_WRITE_AS_ENCODING.h"
  50. #include "Discriminant_def.h"
  51. #include "oo_WRITE_TEXT.h"
  52. #include "Discriminant_def.h"
  53. #include "oo_READ_TEXT.h"
  54. #include "Discriminant_def.h"
  55. #include "oo_WRITE_BINARY.h"
  56. #include "Discriminant_def.h"
  57. #include "oo_READ_BINARY.h"
  58. #include "Discriminant_def.h"
  59. #include "oo_DESCRIPTION.h"
  60. #include "Discriminant_def.h"
  61. Thing_implement (Discriminant, Daata, 1);
  62. void structDiscriminant :: v_info () {
  63. structDaata :: v_info ();
  64. MelderInfo_writeLine (U"Number of groups: ", numberOfGroups);
  65. MelderInfo_writeLine (U"Number of eigenvalues: ", eigen -> numberOfEigenvalues);
  66. MelderInfo_writeLine (U"Dimension of eigenvector: ", eigen -> dimension);
  67. MelderInfo_writeLine (U"Number of discriminant functions: ", Discriminant_getNumberOfFunctions (this));
  68. MelderInfo_writeLine (U"Number of observations (total): ", Discriminant_getNumberOfObservations (this, 0));
  69. }
  70. autoDiscriminant Discriminant_create (integer numberOfGroups, integer numberOfEigenvalues, integer dimension) {
  71. try {
  72. autoDiscriminant me = Thing_new (Discriminant);
  73. my eigen = Eigen_create (numberOfEigenvalues, dimension);
  74. my numberOfGroups = numberOfGroups;
  75. my groups = SSCPList_create ();
  76. my total = SSCP_create (dimension);
  77. my aprioriProbabilities = VECraw (numberOfGroups);
  78. my costs = MATraw (numberOfGroups, numberOfGroups);
  79. return me;
  80. } catch (MelderError) {
  81. Melder_throw (U"Discriminant not created.");
  82. }
  83. }
  84. integer Discriminant_groupLabelToIndex (Discriminant me, conststring32 label) {
  85. for (integer i = 1; i <= my numberOfGroups; i ++) {
  86. conststring32 name = Thing_getName (my groups -> at [i]);
  87. if (name && str32equ (name, label))
  88. return i;
  89. }
  90. return 0;
  91. }
  92. integer Discriminant_getNumberOfGroups (Discriminant me) {
  93. return my numberOfGroups;
  94. }
  95. integer Discriminant_getNumberOfObservations (Discriminant me, integer group) {
  96. if (group == 0) {
  97. return Melder_ifloor (my total -> numberOfObservations);
  98. } else if (group >= 1 && group <= my numberOfGroups) {
  99. SSCP sscp = my groups->at [group];
  100. return Melder_ifloor (sscp -> numberOfObservations);
  101. } else {
  102. return -1;
  103. }
  104. }
  105. void Discriminant_setAprioriProbability (Discriminant me, integer group, double p) {
  106. Melder_require (group > 0 && group <= my numberOfGroups, U"The group number (", group, U") should be in the interval [1, ", my numberOfGroups, U"]; the supplied value (", group, U") falls outside it.");
  107. Melder_require (p >= 0.0 && p <= 1.0, U"The probability should be in the interval [0, 1]");
  108. my aprioriProbabilities [group] = p;
  109. }
  110. integer Discriminant_getNumberOfFunctions (Discriminant me) {
  111. integer numberOfFunctions = std::min (my numberOfGroups - 1, my eigen -> dimension);
  112. numberOfFunctions = std::min (numberOfFunctions, my eigen -> numberOfEigenvalues);
  113. return numberOfFunctions;
  114. }
  115. void Discriminant_setGroupLabels (Discriminant me, Strings thee) {
  116. Melder_require (my numberOfGroups == thy numberOfStrings,
  117. U"The number of strings should equal the number of groups.");
  118. for (integer i = 1; i <= my numberOfGroups; i ++) {
  119. conststring32 name = thy strings [i].get();
  120. Thing_setName (my groups->at [i], name ? name : U"");
  121. }
  122. }
  123. autoStrings Discriminant_extractGroupLabels (Discriminant me) {
  124. try {
  125. autoStrings thee = Thing_new (Strings);
  126. thy strings = autostring32vector (my numberOfGroups);
  127. thy numberOfStrings = my numberOfGroups;
  128. for (integer i = 1; i <= my numberOfGroups; i ++) {
  129. conststring32 name = Thing_getName (my groups->at [i]);
  130. thy strings [i] = Melder_dup (name);
  131. }
  132. return thee;
  133. } catch (MelderError) {
  134. Melder_throw (me, U": group labels not extracted.");
  135. }
  136. }
  137. autoTableOfReal Discriminant_extractGroupCentroids (Discriminant me) {
  138. try {
  139. integer m = my groups -> size, n = my eigen -> dimension;
  140. autoTableOfReal thee = TableOfReal_create (m, n);
  141. for (integer i = 1; i <= m; i ++) {
  142. SSCP sscp = my groups->at [i];
  143. TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
  144. VECcopy_preallocated ( thy data.row(i), sscp -> centroid.get());
  145. }
  146. thy columnLabels. copyElementsFrom_upTo (my groups->at [m] -> columnLabels.get(), n);
  147. // ppgb FIXME: that other number of columns could also be n, but that is not documented; if so, add an assert above
  148. return thee;
  149. } catch (MelderError) {
  150. Melder_throw (me, U": group centroids not extracted.");
  151. }
  152. }
  153. autoTableOfReal Discriminant_extractGroupStandardDeviations (Discriminant me) {
  154. try {
  155. integer m = my groups->size, n = my eigen -> dimension;
  156. autoTableOfReal thee = TableOfReal_create (m, n);
  157. for (integer i = 1; i <= m; i ++) {
  158. SSCP sscp = my groups->at [i];
  159. TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
  160. integer numberOfObservationsm1 = Melder_ifloor (sscp -> numberOfObservations) - 1;
  161. for (integer j = 1; j <= n; j ++) {
  162. thy data [i] [j] = ( numberOfObservationsm1 > 0 ? sqrt (sscp -> data [j] [j] / numberOfObservationsm1) : undefined );
  163. }
  164. }
  165. thy columnLabels. copyElementsFrom_upTo (my groups->at [m] -> columnLabels.get(), n);
  166. // ppgb FIXME: that other number of columns could also be n, but that is not documented; if so, add an assert above
  167. return thee;
  168. } catch (MelderError) {
  169. Melder_throw (me, U": group standard deviations not extracted.");
  170. }
  171. }
  172. double Discriminant_getWilksLambda (Discriminant me, integer from) {
  173. integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
  174. if (from >= numberOfFunctions) {
  175. return 1;
  176. }
  177. if (from < 1) {
  178. from = 1;
  179. }
  180. return NUMwilksLambda (my eigen -> eigenvalues.at, 1 + from, numberOfFunctions);
  181. }
  182. /*
  183. raw r [j]: eigenvec [i] [j]
  184. unstandardized u [j]: sqrt(N-g) * r [j]
  185. standardized s [j]: u [j] sqrt (w [i] [i] / (N-g))
  186. */
  187. autoTableOfReal Discriminant_extractCoefficients (Discriminant me, int choice) {
  188. try {
  189. bool raw = choice == 0, standardized = choice == 2;
  190. integer nx = my eigen -> dimension, ny = my eigen -> numberOfEigenvalues;
  191. SSCP total = my total.get();
  192. autoTableOfReal thee = TableOfReal_create (ny, nx + 1);
  193. thy columnLabels. copyElementsFrom_upTo (my total -> columnLabels.get(), nx);
  194. // ppgb FIXME: that other number of columns should be at least nx (is it nx?), but that is not documented; if so, add an assert above
  195. autoSSCP within;
  196. if (standardized) {
  197. within = Discriminant_extractPooledWithinGroupsSSCP (me);
  198. }
  199. TableOfReal_setColumnLabel (thee.get(), nx + 1, U"constant");
  200. TableOfReal_setSequentialRowLabels (thee.get(), 1, ny, U"function_", 1, 1);
  201. double scale = sqrt (total -> numberOfObservations - my numberOfGroups);
  202. double *centroid = my total -> centroid.at;
  203. for (integer i = 1; i <= ny; i ++) {
  204. longdouble u0 = 0.0;
  205. for (integer j = 1; j <= nx; j ++) {
  206. if (standardized) {
  207. scale = sqrt (within -> data [j] [j]);
  208. }
  209. double ui = scale * my eigen -> eigenvectors [i] [j];
  210. thy data [i] [j] = ui;
  211. u0 += ui * centroid [j];
  212. }
  213. thy data [i] [nx + 1] = ( raw ? 0.0 : - (double) u0 );
  214. }
  215. return thee;
  216. } catch (MelderError) {
  217. Melder_throw (me, U": coefficients not extracted.");
  218. }
  219. }
  220. static double Discriminant_getDegreesOfFreedom (Discriminant me) {
  221. double ndf = 0.0;
  222. for (integer i = 1; i <= my groups->size; i ++) {
  223. ndf += SSCP_getDegreesOfFreedom (my groups->at [i]);
  224. }
  225. return ndf;
  226. }
  227. void Discriminant_getPartialDiscriminationProbability (Discriminant me, integer numberOfWantedDimensions, double *out_prob, double *out_chisq, double *out_df)
  228. {
  229. integer eigendimension = my eigen -> dimension, numberOfGroups = my numberOfGroups;
  230. integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
  231. double degreesOfFreedom = Discriminant_getDegreesOfFreedom (me);
  232. double prob = undefined, chisq = undefined, df = undefined;
  233. if (numberOfWantedDimensions < numberOfFunctions) {
  234. double lambda = NUMwilksLambda (my eigen -> eigenvalues.at, numberOfWantedDimensions + 1, numberOfFunctions);
  235. if (lambda != 1.0) {
  236. chisq = - (degreesOfFreedom + (numberOfGroups - eigendimension) / 2.0 - 1.0) * log (lambda);
  237. df = (eigendimension - numberOfWantedDimensions) * (numberOfGroups - numberOfWantedDimensions - 1);
  238. if (out_prob) {
  239. prob = NUMchiSquareQ (chisq, df);
  240. }
  241. }
  242. }
  243. if (out_prob) {
  244. *out_prob = prob;
  245. }
  246. if (out_chisq) {
  247. *out_chisq = chisq;
  248. }
  249. if (out_df) {
  250. *out_df = df;
  251. }
  252. }
  253. double Discriminant_getConcentrationEllipseArea (Discriminant me, integer group,
  254. double scale, bool confidence, bool discriminantDirections, integer d1, integer d2)
  255. {
  256. double area = undefined;
  257. if (group < 1 || group > my numberOfGroups) {
  258. return area;
  259. }
  260. if (discriminantDirections) {
  261. autoSSCP thee = Eigen_SSCP_project (my eigen.get(), my groups->at [group]);
  262. area = SSCP_getConcentrationEllipseArea (thee.get(), scale, confidence, d1, d2);
  263. } else {
  264. area = SSCP_getConcentrationEllipseArea (my groups->at [group], scale, confidence, d1, d2);
  265. }
  266. return area;
  267. }
  268. double Discriminant_getLnDeterminant_group (Discriminant me, integer group) {
  269. if (group < 1 || group > my numberOfGroups) {
  270. return undefined;
  271. }
  272. autoCovariance c = SSCP_to_Covariance (my groups->at [group], 1);
  273. double ln_d = SSCP_getLnDeterminant (c.get());
  274. return ln_d;
  275. }
  276. double Discriminant_getLnDeterminant_total (Discriminant me) {
  277. autoCovariance c = SSCP_to_Covariance (my total.get(), 1);
  278. double ln_d = SSCP_getLnDeterminant (c.get());
  279. return ln_d;
  280. }
  281. autoSSCP Discriminant_extractPooledWithinGroupsSSCP (Discriminant me) {
  282. return SSCPList_to_SSCP_pool (my groups.get());
  283. }
  284. autoSSCP Discriminant_extractWithinGroupSSCP (Discriminant me, integer index) {
  285. try {
  286. Melder_require (index > 0 && index <= my numberOfGroups, U"Index should be in interval [1,", my numberOfGroups, U"].");
  287. autoSSCP thee = Data_copy (my groups->at [index]);
  288. return thee;
  289. } catch (MelderError) {
  290. Melder_throw (me, U": within group SSCP not created.");
  291. }
  292. }
  293. autoSSCP Discriminant_extractBetweenGroupsSSCP (Discriminant me) {
  294. try {
  295. integer n = my total -> numberOfRows;
  296. autoSSCP b = Data_copy (my total.get());
  297. autoSSCP w = SSCPList_to_SSCP_pool (my groups.get());
  298. for (integer i = 1; i <= n; i ++) {
  299. for (integer j = i; j <= n; j ++) {
  300. b -> data [j] [i] = (b -> data [i] [j] -= w -> data [i] [j]);
  301. }
  302. }
  303. return b;
  304. } catch (MelderError) {
  305. Melder_throw (me, U": between group SSCP not created.");
  306. }
  307. }
  308. void Discriminant_drawTerritorialMap (Discriminant me, Graphics g, bool discriminantDirections, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, bool poolCovarianceMatrices, bool garnish) {
  309. (void) me; (void) g; (void) discriminantDirections; (void) d1; (void) d2;
  310. (void) xmin; (void) xmax; (void) ymin;
  311. (void) ymax; (void) fontSize; (void) poolCovarianceMatrices; (void) garnish;
  312. }
  313. void Discriminant_drawConcentrationEllipses (Discriminant me, Graphics g, double scale, bool confidence, conststring32 label, bool discriminantDirections, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, bool garnish)
  314. {
  315. integer numberOfFunctions = Discriminant_getNumberOfFunctions (me);
  316. if (! discriminantDirections) {
  317. SSCPList_drawConcentrationEllipses (my groups.get(), g, scale, confidence, label, d1, d2, xmin, xmax, ymin, ymax, fontSize, garnish);
  318. return;
  319. }
  320. if (numberOfFunctions <= 1) {
  321. Melder_warning (U"Discriminant_drawConcentrationEllipses: Nothing drawn because there is only one dimension in the discriminant space.");
  322. return;
  323. }
  324. // Project SSCPs on eigenvectors.
  325. if (d1 == 0 && d2 == 0) {
  326. d1 = 1;
  327. d2 = std::min (numberOfFunctions, d1 + 1);
  328. } else if (d1 < 0 || d2 > numberOfFunctions) {
  329. return;
  330. }
  331. autoSSCPList thee = SSCPList_toTwoDimensions (my groups.get(), my eigen -> eigenvectors.row (d1), my eigen -> eigenvectors.row (d2));
  332. SSCPList_drawConcentrationEllipses (thee.get(), g, scale, confidence, label, 1, 2, xmin, xmax, ymin, ymax, fontSize, 0);
  333. if (garnish) {
  334. char32 llabel [40];
  335. Graphics_drawInnerBox (g);
  336. Graphics_marksLeft (g, 2, true, true, false);
  337. Melder_sprint (llabel,40, U"function ", d2);
  338. Graphics_textLeft (g, true, llabel);
  339. Graphics_marksBottom (g, 2, true, true, false);
  340. Melder_sprint (llabel,40, U"function ", d1);
  341. Graphics_textBottom (g, true, llabel);
  342. }
  343. }
  344. autoDiscriminant TableOfReal_to_Discriminant (TableOfReal me) {
  345. try {
  346. autoDiscriminant thee = Thing_new (Discriminant);
  347. integer dimension = my numberOfColumns;
  348. Melder_require (NUMdefined (my data.get()),
  349. U"There should be no undefined elements in the table.");
  350. Melder_require (TableOfReal_hasRowLabels (me),
  351. U"All rows should be labeled.");
  352. autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me);
  353. if (! TableOfReal_hasColumnLabels (mew.get()))
  354. TableOfReal_setSequentialColumnLabels (mew.get(), 0, 0, U"c", 1, 1);
  355. thy groups = TableOfReal_to_SSCPList_byLabel (mew.get());
  356. thy total = TableOfReal_to_SSCP (mew.get(), 0, 0, 0, 0);
  357. if ((thy numberOfGroups = thy groups -> size) < 2)
  358. Melder_throw (U"Number of groups should be greater than one.");
  359. TableOfReal_centreColumns_byRowLabel (mew.get());
  360. // Overall centroid and apriori probabilities and costs.
  361. autoVEC centroid = VECzero (dimension);
  362. autoMAT between = MATzero (thy numberOfGroups, dimension);
  363. thy aprioriProbabilities = VECraw (thy numberOfGroups);
  364. longdouble sum = 0.0;
  365. for (integer k = 1; k <= thy numberOfGroups; k ++) {
  366. SSCP m = thy groups->at [k];
  367. double scale = SSCP_getNumberOfObservations (m);
  368. sum += scale;
  369. for (integer j = 1; j <= dimension; j ++)
  370. centroid [j] += scale * m -> centroid [j];
  371. }
  372. for (integer j = 1; j <= dimension; j ++)
  373. centroid [j] /= sum;
  374. for (integer k = 1; k <= thy numberOfGroups; k ++) {
  375. SSCP m = thy groups->at [k];
  376. double scale = SSCP_getNumberOfObservations (m);
  377. thy aprioriProbabilities [k] = scale / my numberOfRows;
  378. for (integer j = 1; j <= dimension; j ++)
  379. between [k] [j] = sqrt (scale) * (m -> centroid [j] - centroid [j]);
  380. }
  381. // We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices.
  382. // We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for
  383. // the eigenvalues and eigenvectors of the equation.
  384. thy eigen = Thing_new (Eigen);
  385. Eigen_initFromSquareRootPair (thy eigen.get(), between.get(), mew -> data.get());
  386. /*
  387. Costs.
  388. */
  389. thy costs = MATraw (thy numberOfGroups, thy numberOfGroups);
  390. for (integer igroup = 1; igroup <= thy numberOfGroups; igroup ++) {
  391. for (integer jgroup = igroup + 1; jgroup <= thy numberOfGroups; jgroup ++)
  392. thy costs [igroup] [jgroup] = thy costs [jgroup] [igroup] = 1.0;
  393. /*
  394. As the costs have been raw-initialized, set the diagonal to zero.
  395. */
  396. thy costs [igroup] [igroup] = 0.0;
  397. }
  398. return thee;
  399. } catch (MelderError) {
  400. Melder_throw (me, U": Discriminant not created.");
  401. }
  402. }
  403. autoConfiguration Discriminant_TableOfReal_to_Configuration (Discriminant me, TableOfReal thee, integer numberOfDimensions) {
  404. try {
  405. if (numberOfDimensions == 0)
  406. numberOfDimensions = Discriminant_getNumberOfFunctions (me);
  407. autoConfiguration him = Configuration_create (thy numberOfRows, numberOfDimensions);
  408. Eigen_TableOfReal_into_TableOfReal_projectRows (my eigen.get(), thee, 1, him.get(), 1, numberOfDimensions);
  409. TableOfReal_copyLabels (thee, him.get(), 1, 0);
  410. TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"Eigenvector ", 1, 1);
  411. return him;
  412. } catch (MelderError) {
  413. Melder_throw (U"Configuration not created.");
  414. }
  415. }
  416. autoTableOfReal Discriminant_TableOfReal_mahalanobis (Discriminant me, TableOfReal thee, integer group, bool poolCovarianceMatrices) {
  417. try {
  418. Melder_require (group > 0 && group <= my numberOfGroups,
  419. U"Group should be in the range [1, ", my numberOfGroups, U"].");
  420. autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get());
  421. autoCovariance covg = SSCP_to_Covariance (pool.get(), my numberOfGroups);
  422. autoCovariance cov = SSCP_to_Covariance (my groups->at [group], 1);
  423. autoTableOfReal him;
  424. if (poolCovarianceMatrices) { // use group mean instead of overall mean!
  425. VECcopy_preallocated (covg -> centroid.get(), cov -> centroid.get());
  426. him = Covariance_TableOfReal_mahalanobis (covg.get(), thee, false);
  427. } else {
  428. him = Covariance_TableOfReal_mahalanobis (cov.get(), thee, false);
  429. }
  430. return him;
  431. } catch (MelderError) {
  432. Melder_throw (U"TableOfReal not created.");
  433. }
  434. }
  435. autoClassificationTable Discriminant_TableOfReal_to_ClassificationTable (Discriminant me, TableOfReal thee, bool poolCovarianceMatrices, bool useAprioriProbabilities) {
  436. try {
  437. integer numberOfGroups = Discriminant_getNumberOfGroups (me);
  438. integer dimension = Eigen_getDimensionOfComponents (my eigen.get());
  439. Melder_require (dimension == thy numberOfColumns, U"The number of columns should agree with the dimension of the discriminant.");
  440. autoVEC log_p (numberOfGroups, kTensorInitializationType::RAW);
  441. autoVEC log_apriori (numberOfGroups, kTensorInitializationType::RAW);
  442. autoVEC ln_determinant (numberOfGroups, kTensorInitializationType::RAW);
  443. autoVEC buf (dimension, kTensorInitializationType::RAW);
  444. autoNUMvector<SSCP> sscpvec (1, numberOfGroups);
  445. autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get());
  446. autoClassificationTable him = ClassificationTable_create (thy numberOfRows, numberOfGroups);
  447. his rowLabels. copyElementsFrom (thy rowLabels.get());
  448. // Scale the sscp to become a covariance matrix.
  449. for (integer i = 1; i <= dimension; i ++) {
  450. for (integer k = i; k <= dimension; k ++)
  451. pool -> data [k] [i] = pool -> data [i] [k] /= pool -> numberOfObservations - numberOfGroups;
  452. }
  453. double lnd;
  454. autoSSCPList agroups; SSCPList groups; // ppgb FIXME dit kan niet goed izjn
  455. if (poolCovarianceMatrices) {
  456. /*
  457. Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
  458. L^-1 will be used later in the Mahalanobis distance calculation:
  459. v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).
  460. */
  461. MATlowerCholeskyInverse_inplace (pool -> data.get(), & lnd);
  462. for (integer j = 1; j <= numberOfGroups; j ++) {
  463. ln_determinant [j] = lnd;
  464. sscpvec [j] = pool.get();
  465. }
  466. groups = my groups.get();
  467. } else {
  468. // Calculate the inverses of all group covariance matrices.
  469. // In case of a singular matrix, substitute inverse of pooled.
  470. agroups = Data_copy (my groups.get());
  471. groups = agroups.get();
  472. integer npool = 0;
  473. for (integer j = 1; j <= numberOfGroups; j ++) {
  474. SSCP t = groups->at [j];
  475. integer no = Melder_ifloor (SSCP_getNumberOfObservations (t));
  476. for (integer i = 1; i <= dimension; i ++) {
  477. for (integer k = i; k <= dimension; k ++)
  478. t -> data [k] [i] = t -> data [i] [k] /= no - 1;
  479. }
  480. sscpvec [j] = groups->at [j];
  481. try {
  482. MATlowerCholeskyInverse_inplace (t -> data.get(), & ln_determinant [j]);
  483. } catch (MelderError) {
  484. // Try the alternative: the pooled covariance matrix.
  485. // Clear the error.
  486. Melder_clearError ();
  487. if (npool == 0)
  488. MATlowerCholeskyInverse_inplace (pool -> data.get(), & lnd);
  489. npool ++;
  490. sscpvec [j] = pool.get();
  491. ln_determinant [j] = lnd;
  492. }
  493. }
  494. if (npool > 0)
  495. Melder_warning (npool, U" groups use pooled covariance matrix.");
  496. }
  497. // Labels for columns in ClassificationTable
  498. for (integer j = 1; j <= numberOfGroups; j ++) {
  499. conststring32 name = Thing_getName (my groups->at [j]);
  500. if (! name)
  501. name = U"?";
  502. TableOfReal_setColumnLabel (him.get(), j, name);
  503. }
  504. // Normalize the sum of the apriori probabilities to 1.
  505. // Next take ln (p) because otherwise probabilities might be too small to represent.
  506. VEC_normalize_inplace (my aprioriProbabilities.get(), 1.0, 1.0);
  507. double logg = log (numberOfGroups);
  508. for (integer j = 1; j <= numberOfGroups; j ++) {
  509. log_apriori [j] = useAprioriProbabilities ? log (my aprioriProbabilities [j]) : - logg;
  510. }
  511. // Generalized squared distance function:
  512. // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)
  513. for (integer i = 1; i <= thy numberOfRows; i ++) {
  514. double norm = 0.0, pt_max = -1e308;
  515. for (integer j = 1; j <= numberOfGroups; j ++) {
  516. SSCP t = groups->at [j];
  517. double md = NUMmahalanobisDistance (sscpvec [j] -> data.get(), thy data.row(i),t -> centroid.get());
  518. //double md = mahalanobisDistanceSq (sscpvec [j] -> data.at, dimension, thy data [i], t -> centroid, buf.at);
  519. double pt = log_apriori [j] - 0.5 * (ln_determinant [j] + md);
  520. if (pt > pt_max) {
  521. pt_max = pt;
  522. }
  523. log_p [j] = pt;
  524. }
  525. for (integer j = 1; j <= numberOfGroups; j ++) {
  526. norm += log_p [j] = exp (log_p [j] - pt_max);
  527. }
  528. for (integer j = 1; j <= numberOfGroups; j ++) {
  529. his data [i] [j] = log_p [j] / norm;
  530. }
  531. }
  532. return him;
  533. } catch (MelderError) {
  534. Melder_throw (U"ClassificationTable from Discriminant & TableOfReal not created.");
  535. }
  536. }
  537. autoClassificationTable Discriminant_TableOfReal_to_ClassificationTable_dw (Discriminant me, TableOfReal thee, bool poolCovarianceMatrices, bool useAprioriProbabilities, double alpha, double minProb, autoTableOfReal *displacements) {
  538. try {
  539. integer g = Discriminant_getNumberOfGroups (me);
  540. integer p = Eigen_getDimensionOfComponents (my eigen.get());
  541. integer m = thy numberOfRows;
  542. Melder_require (p == thy numberOfColumns, U"The number of columns does not agree with the dimension of the discriminant.");
  543. autoNUMvector<double> log_p (1, g);
  544. autoNUMvector<double> log_apriori (1, g);
  545. autoNUMvector<double> ln_determinant (1, g);
  546. autoNUMvector<double> buf (1, p);
  547. autoNUMvector<double> displacement (1, p);
  548. autoVEC x = VECzero (p);
  549. autoNUMvector<SSCP> sscpvec (1, g);
  550. autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get());
  551. autoClassificationTable him = ClassificationTable_create (m, g);
  552. his rowLabels. copyElementsFrom (thy rowLabels.get());
  553. autoTableOfReal adisplacements = Data_copy (thee);
  554. // Scale the sscp to become a covariance matrix.
  555. for (integer i = 1; i <= p; i ++) {
  556. for (integer k = i; k <= p; k ++) {
  557. pool -> data [k] [i] = pool -> data [i] [k] /= pool -> numberOfObservations - g;
  558. }
  559. }
  560. double lnd;
  561. autoSSCPList agroups;
  562. SSCPList groups;
  563. if (poolCovarianceMatrices) {
  564. // Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
  565. // L^-1 will be used later in the Mahalanobis distance calculation:
  566. // v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).
  567. MATlowerCholeskyInverse_inplace (pool -> data.get(), & lnd);
  568. for (integer j = 1; j <= g; j ++) {
  569. ln_determinant [j] = lnd;
  570. sscpvec [j] = pool.get();
  571. }
  572. groups = my groups.get();
  573. } else {
  574. //Calculate the inverses of all group covariance matrices.
  575. // In case of a singular matrix, substitute inverse of pooled.
  576. agroups = Data_copy (my groups.get());
  577. groups = agroups.get();
  578. integer npool = 0;
  579. for (integer j = 1; j <= g; j ++) {
  580. SSCP t = groups->at [j];
  581. integer no = Melder_ifloor (SSCP_getNumberOfObservations (t));
  582. for (integer i = 1; i <= p; i ++) {
  583. for (integer k = i; k <= p; k ++) {
  584. t -> data [k] [i] = t -> data [i] [k] /= no - 1;
  585. }
  586. }
  587. sscpvec [j] = groups->at [j];
  588. try {
  589. MATlowerCholeskyInverse_inplace (t -> data.get(), & ln_determinant [j]);
  590. } catch (MelderError) {
  591. // Try the alternative: the pooled covariance matrix.
  592. // Clear the error.
  593. Melder_clearError ();
  594. if (npool == 0) {
  595. MATlowerCholeskyInverse_inplace (pool -> data.get(), & lnd);
  596. }
  597. npool ++;
  598. sscpvec [j] = pool.get();
  599. ln_determinant [j] = lnd;
  600. }
  601. }
  602. if (npool > 0) {
  603. Melder_warning (npool, U" groups use pooled covariance matrix.");
  604. }
  605. }
  606. // Labels for columns in ClassificationTable
  607. for (integer j = 1; j <= g; j ++) {
  608. conststring32 name = Thing_getName (my groups->at [j]);
  609. if (! name)
  610. name = U"?";
  611. TableOfReal_setColumnLabel (him.get(), j, name);
  612. }
  613. // Normalize the sum of the apriori probabilities to 1.
  614. // Next take ln (p) because otherwise probabilities might be too small to represent.
  615. double logg = log (g);
  616. VEC_normalize_inplace (my aprioriProbabilities.get(), 1.0, 1.0);
  617. for (integer j = 1; j <= g; j ++) {
  618. log_apriori [j] = useAprioriProbabilities ? log (my aprioriProbabilities [j]) : - logg;
  619. }
  620. // Generalized squared distance function:
  621. // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)
  622. for (integer i = 1; i <= m; i ++) {
  623. SSCP winner;
  624. double norm = 0, pt_max = -1e308;
  625. integer iwinner = 1;
  626. for (integer k = 1; k <= p; k ++) {
  627. x [k] = thy data [i] [k] + displacement [k];
  628. }
  629. for (integer j = 1; j <= g; j ++) {
  630. SSCP t = groups->at [j];
  631. double md = NUMmahalanobisDistance (sscpvec [j] -> data.get(), x.get(), t -> centroid.get());
  632. // double md = mahalanobisDistanceSq (sscpvec [j] -> data.at, p, x.peek(), t -> centroid, buf.peek());
  633. double pt = log_apriori [j] - 0.5 * (ln_determinant [j] + md);
  634. if (pt > pt_max) {
  635. pt_max = pt;
  636. iwinner = j;
  637. }
  638. log_p [j] = pt;
  639. }
  640. for (integer j = 1; j <= g; j ++) {
  641. norm += log_p [j] = exp (log_p [j] - pt_max);
  642. }
  643. for (integer j = 1; j <= g; j ++) {
  644. his data [i] [j] = log_p [j] / norm;
  645. }
  646. // Save old displacement, calculate new displacement
  647. winner = groups->at [iwinner];
  648. for (integer k = 1; k <= p; k ++) {
  649. adisplacements -> data [i] [k] = displacement [k];
  650. if (his data [i] [iwinner] > minProb) {
  651. double delta_k = winner -> centroid [k] - x [k];
  652. displacement [k] += alpha * delta_k;
  653. }
  654. }
  655. }
  656. *displacements = adisplacements.move();
  657. return him;
  658. } catch (MelderError) {
  659. Melder_throw (U"ClassificationTable for Weenink procedure not created.");
  660. }
  661. }
  662. autoConfiguration TableOfReal_to_Configuration_lda (TableOfReal me, integer numberOfDimensions) {
  663. try {
  664. autoDiscriminant thee = TableOfReal_to_Discriminant (me);
  665. autoConfiguration him = Discriminant_TableOfReal_to_Configuration (thee.get(), me, numberOfDimensions);
  666. return him;
  667. } catch (MelderError) {
  668. Melder_throw (me, U": Configuration with lda data not created.");
  669. }
  670. }
  671. /* End of file Discriminant.cpp */