PCA.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /* PCA.cpp
  2. *
  3. * Principal Component Analysis
  4. *
  5. * Copyright (C) 1993-2018 David Weenink
  6. *
  7. * This code is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * This code is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /*
  21. djmw 20000511 PCA_TableOfReal_to_Configuration: added centralize option.
  22. (later removed)
  23. djmw 20020327 PCA_TableOfReal_to_Configuration modified internals.
  24. djmw 20020418 Removed some causes for compiler warnings.
  25. djmw 20020502 modified call Eigen_TableOfReal_project_into.
  26. djmw 20030324 Added PCA_TableOfReal_getFractionVariance.
  27. djmw 20061212 Changed info to Melder_writeLine<x> format.
  28. djmw 20071012 Added: oo_CAN_WRITE_AS_ENCODING.h
  29. djmw 20071201 Melder_warning<n>
  30. djmw 20081119 Check in TableOfReal_to_PCA if TableOfReal_areAllCellsDefined
  31. djmw 20110304 Thing_new
  32. */
  33. #include "Configuration.h"
  34. #include "Eigen_and_SSCP.h"
  35. #include "Eigen_and_TableOfReal.h"
  36. #include "Matrix_extensions.h"
  37. #include "NUM2.h"
  38. #include "PCA.h"
  39. #include "TableOfReal_extensions.h"
  40. #include "oo_DESTROY.h"
  41. #include "PCA_def.h"
  42. #include "oo_COPY.h"
  43. #include "PCA_def.h"
  44. #include "oo_EQUAL.h"
  45. #include "PCA_def.h"
  46. #include "oo_CAN_WRITE_AS_ENCODING.h"
  47. #include "PCA_def.h"
  48. #include "oo_WRITE_TEXT.h"
  49. #include "PCA_def.h"
  50. #include "oo_READ_TEXT.h"
  51. #include "PCA_def.h"
  52. #include "oo_WRITE_BINARY.h"
  53. #include "PCA_def.h"
  54. #include "oo_READ_BINARY.h"
  55. #include "PCA_def.h"
  56. #include "oo_DESCRIPTION.h"
  57. #include "PCA_def.h"
  58. Thing_implement (PCA, Eigen, 0);
  59. void structPCA :: v_info () {
  60. structDaata :: v_info ();
  61. MelderInfo_writeLine (U"Number of components: ", numberOfEigenvalues);
  62. MelderInfo_writeLine (U"Number of dimensions: ", dimension);
  63. MelderInfo_writeLine (U"Number of observations: ", numberOfObservations);
  64. }
  65. autoPCA PCA_create (integer numberOfComponents, integer dimension) {
  66. try {
  67. autoPCA me = Thing_new (PCA);
  68. Eigen_init (me.get(), numberOfComponents, dimension);
  69. my labels = autostring32vector (dimension);
  70. my centroid = VECzero (dimension);
  71. return me;
  72. } catch (MelderError) {
  73. Melder_throw (U"PCA not created");
  74. }
  75. }
  76. void PCA_setNumberOfObservations (PCA me, integer numberOfObservations) {
  77. my numberOfObservations = numberOfObservations;
  78. }
  79. integer PCA_getNumberOfObservations (PCA me) {
  80. return my numberOfObservations;
  81. }
  82. void PCA_getEqualityOfEigenvalues (PCA me, integer from, integer to, int conservative, double *p_prob, double *p_chisq, double *p_df) {
  83. double sum = 0.0, sumln = 0.0;
  84. double prob = undefined, df = undefined, chisq = undefined;
  85. if (from == 0 && to == 0) {
  86. to = 1;
  87. from = my numberOfEigenvalues;
  88. }
  89. if (from < to && from > 0 && to <= my numberOfEigenvalues) {
  90. integer i;
  91. for (i = from; i <= to; i ++) {
  92. if (my eigenvalues [i] <= 0) {
  93. break;
  94. }
  95. sum += my eigenvalues [i];
  96. sumln += log (my eigenvalues [i]);
  97. }
  98. if (sum == 0.0) {
  99. return;
  100. }
  101. integer r = i - from;
  102. double n = my numberOfObservations - 1;
  103. if (conservative) {
  104. n -= from + (double) (r * (2 * r + 1) + 2) / (6.0 * r);
  105. }
  106. df = r * (r + 1) / 2 - 1;
  107. chisq = n * (r * log (sum / r) - sumln);
  108. prob = NUMchiSquareQ (chisq, df);
  109. }
  110. if (p_prob) {
  111. *p_prob = prob;
  112. }
  113. if (p_chisq) {
  114. *p_chisq = chisq;
  115. }
  116. if (p_df) {
  117. *p_df = df;
  118. }
  119. }
  120. /* the low level routines
  121. *
  122. * The matrix M[numberOfRows, numberOfColumns] is interpreted as 'numberOfRows' vectors of dimension 'numberOfColumns'
  123. * The eigenstructure of the M'M will be calculated
  124. *
  125. */
  126. autoEigen PCA_to_Eigen (PCA me) {
  127. try {
  128. autoEigen thee = Eigen_create (my numberOfEigenvalues, my dimension);
  129. NUMmatrix_copyElements <double> (my eigenvectors.at, thy eigenvectors.at, 1, my numberOfEigenvalues, 1, my dimension);
  130. NUMvector_copyElements <double> (my eigenvalues.at, thy eigenvalues.at, 1, my numberOfEigenvalues);
  131. return thee;
  132. } catch (MelderError) {
  133. Melder_throw (me, U": no Eigen created.");
  134. }
  135. }
  136. static autoPCA NUMdmatrix_to_PCA (constMAT m, bool byColumns) {
  137. try {
  138. Melder_require (NUMdefined (m),
  139. U"All matrix elements should be defined.");
  140. Melder_require (NUMfrobeniusnorm (m) > 0.0,
  141. U"Not all values in your table should be zero.");
  142. autoMAT mcopy;
  143. if (byColumns) {
  144. if (m.ncol < m.nrow)
  145. Melder_warning (U"The number of columns in your table is less than the number of rows.");
  146. mcopy = MATtranspose (m);
  147. } else {
  148. if (m.nrow < m.ncol)
  149. Melder_warning (U"The number of rows in your table is less than the number of columns.");
  150. mcopy = MATcopy (m);
  151. }
  152. autoPCA thee = Thing_new (PCA);
  153. thy centroid = VECzero (mcopy.ncol);
  154. VECcolumnMeans_preallocated (thy centroid.get(), mcopy.get());
  155. MATsubtract_inplace (mcopy.get(), thy centroid.get());
  156. Eigen_initFromSquareRoot (thee.get(), mcopy.get());
  157. thy labels = autostring32vector (mcopy.ncol);
  158. PCA_setNumberOfObservations (thee.get(), mcopy.nrow);
  159. /*
  160. The covariance matrix C = A'A / (N-1). However, we have calculated
  161. the eigenstructure for A'A. This has no consequences for the
  162. eigenvectors, but the eigenvalues have to be divided by (N-1).
  163. */
  164. for (integer i = 1; i <= thy numberOfEigenvalues; i ++)
  165. thy eigenvalues [i] /= (mcopy.nrow - 1);
  166. return thee;
  167. } catch (MelderError) {
  168. Melder_throw (U"No PCA created from ", ( byColumns ? U"columns." : U"rows." ));
  169. }
  170. }
  171. autoPCA TableOfReal_to_PCA_byRows (TableOfReal me) {
  172. try {
  173. autoPCA thee = NUMdmatrix_to_PCA (my data.get(), false);
  174. Melder_assert (thy labels.size == my numberOfColumns);
  175. thy labels. copyElementsFrom (my columnLabels.get());
  176. return thee;
  177. } catch (MelderError) {
  178. Melder_throw (me, U": PCA not created.");
  179. }
  180. }
  181. autoPCA Matrix_to_PCA_byColumns (Matrix me) {
  182. try {
  183. autoPCA thee = NUMdmatrix_to_PCA (my z.get(), true);
  184. return thee;
  185. } catch (MelderError) {
  186. Melder_throw (me, U": no PCA created from columns.");
  187. }
  188. }
  189. autoPCA Matrix_to_PCA_byRows (Matrix me) {
  190. try {
  191. autoPCA thee = NUMdmatrix_to_PCA (my z.get(), false);
  192. return thee;
  193. } catch (MelderError) {
  194. Melder_throw (me, U": no PCA created from rows.");
  195. }
  196. }
  197. autoTableOfReal PCA_TableOfReal_to_TableOfReal_zscores (PCA me, TableOfReal thee, integer numberOfDimensions) {
  198. try {
  199. if (numberOfDimensions == 0 || numberOfDimensions > my numberOfEigenvalues) {
  200. numberOfDimensions = my numberOfEigenvalues;
  201. }
  202. autoTableOfReal him = TableOfReal_create (thy numberOfRows, numberOfDimensions);
  203. for (integer i = 1; i <= thy numberOfRows; i ++) { /* row */
  204. for (integer j = 1; j <= numberOfDimensions; j ++) {
  205. longdouble r = 0.0, sigma = sqrt (my eigenvalues [j]);
  206. for (integer k = 1; k <= my dimension; k ++)
  207. // eigenvector in row, data in row
  208. r += my eigenvectors [j] [k] * (thy data [i] [k] - my centroid [k]) / sigma;
  209. his data [i] [j] = (double) r;
  210. }
  211. }
  212. his rowLabels. copyElementsFrom (thy rowLabels.get());
  213. TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"pc", 1, 1);
  214. return him;
  215. } catch (MelderError) {
  216. Melder_throw (U"TableOfReal (zscores) not created from PCA & TableOfReal.");
  217. }
  218. }
  219. autoTableOfReal PCA_TableOfReal_to_TableOfReal_projectRows (PCA me, TableOfReal thee, integer numberOfDimensionsToKeep) {
  220. try {
  221. if (numberOfDimensionsToKeep == 0 || numberOfDimensionsToKeep > my numberOfEigenvalues) {
  222. numberOfDimensionsToKeep = my numberOfEigenvalues;
  223. }
  224. autoTableOfReal him = TableOfReal_create (thy numberOfRows, numberOfDimensionsToKeep);
  225. Eigen_TableOfReal_into_TableOfReal_projectRows (me, thee, 1, him.get(), 1, numberOfDimensionsToKeep);
  226. his rowLabels. copyElementsFrom (thy rowLabels.get());
  227. TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"pc", 1, 1);
  228. return him;
  229. } catch (MelderError) {
  230. Melder_throw (U"TableOfReal not created from PCA & TableOfReal.");
  231. }
  232. }
  233. autoConfiguration PCA_TableOfReal_to_Configuration (PCA me, TableOfReal thee, integer numberOfDimensionsToKeep) {
  234. try {
  235. if (numberOfDimensionsToKeep == 0 || numberOfDimensionsToKeep > my numberOfEigenvalues) {
  236. numberOfDimensionsToKeep = my numberOfEigenvalues;
  237. }
  238. autoConfiguration him = Configuration_create (thy numberOfRows, numberOfDimensionsToKeep);
  239. Eigen_TableOfReal_into_TableOfReal_projectRows (me, thee, 1, him.get(), 1, numberOfDimensionsToKeep);
  240. his rowLabels. copyElementsFrom (thy rowLabels.get());
  241. TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"pc", 1, 1);
  242. return him;
  243. } catch (MelderError) {
  244. Melder_throw (U"Configuration not created from PCA & TableOfReal.");
  245. }
  246. }
  247. autoTableOfReal PCA_Configuration_to_TableOfReal_reconstruct (PCA me, Configuration thee) {
  248. try {
  249. integer npc = thy numberOfColumns;
  250. Melder_require (thy numberOfColumns <= my dimension,
  251. U"The dimension of the Configuration should be less than or equal to the dimension of the PCA.");
  252. if (npc > my numberOfEigenvalues)
  253. npc = my numberOfEigenvalues;
  254. autoTableOfReal him = TableOfReal_create (thy numberOfRows, my dimension);
  255. Melder_assert (my labels.size == my dimension);
  256. his columnLabels. copyElementsFrom (my labels.get());
  257. his rowLabels. copyElementsFrom (thy rowLabels.get());
  258. for (integer i = 1; i <= thy numberOfRows; i ++) {
  259. double *hisdata = his data [i];
  260. for (integer k = 1; k <= npc; k ++) {
  261. double *evec = my eigenvectors [k], pc = thy data [i] [k];
  262. for (integer j = 1; j <= my dimension; j ++)
  263. hisdata [j] += pc * evec [j];
  264. }
  265. }
  266. return him;
  267. } catch (MelderError) {
  268. Melder_throw (U"TableOfReal not reconstructed.");
  269. }
  270. }
  271. double PCA_TableOfReal_getFractionVariance (PCA me, TableOfReal thee, integer from, integer to) {
  272. try {
  273. double fraction = undefined;
  274. if (from < 1 || from > to || to > thy numberOfColumns)
  275. return undefined;
  276. autoSSCP s = TableOfReal_to_SSCP (thee, 0, 0, 0, 0);
  277. autoSSCP sp = Eigen_SSCP_project (me, s.get());
  278. fraction = SSCP_getFractionVariation (sp.get(), from, to);
  279. return fraction;
  280. } catch (MelderError) {
  281. return undefined;
  282. }
  283. }
  284. autoTableOfReal PCA_to_TableOfReal_reconstruct1 (PCA me, conststring32 numstring) {
  285. try {
  286. integer npc;
  287. autoVEC pc = VEC_createFromString (numstring);
  288. autoConfiguration c = Configuration_create (1, pc.size);
  289. for (integer j = 1; j <= npc; j ++)
  290. c -> data [1] [j] = pc [j];
  291. autoTableOfReal him = PCA_Configuration_to_TableOfReal_reconstruct (me, c.get());
  292. return him;
  293. } catch (MelderError) {
  294. Melder_throw (me, U" not reconstructed.");
  295. }
  296. }
  297. /* End of file PCA.cpp */