CCA_and_Correlation.cpp 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /* CCA_and_Correlation.cpp
  2. *
  3. * Copyright (C) 1993-2018 David Weenink, 2017 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 2001
  20. djmw 20020525 GPL header.
  21. djmw 20060323 Stewart-Love redundancy added.
  22. djmw 20071022 Melder_error<n>
  23. */
  24. #include "CCA_and_Correlation.h"
  25. #include "NUM2.h"
  26. autoTableOfReal CCA_Correlation_factorLoadings (CCA me, Correlation thee) {
  27. try {
  28. integer ny = my y -> dimension, nx = my x -> dimension;
  29. Melder_require (ny + nx == thy numberOfColumns,
  30. U"The number of columns in the Correlation must equal the sum of the dimensions in the CCA object");
  31. autoTableOfReal him = TableOfReal_create (2 * my numberOfCoefficients, thy numberOfColumns);
  32. his columnLabels. copyElementsFrom (thy columnLabels.get());
  33. TableOfReal_setSequentialRowLabels (him.get(), 1, my numberOfCoefficients, U"dv", 1, 1);
  34. TableOfReal_setSequentialRowLabels (him.get(), my numberOfCoefficients + 1, 2 * my numberOfCoefficients, U"iv", 1, 1);
  35. double **evecy = my y -> eigenvectors.at, **evecx = my x -> eigenvectors.at;
  36. for (integer i = 1; i <= thy numberOfRows; i ++) {
  37. for (integer j = 1; j <= my numberOfCoefficients; j ++) {
  38. longdouble t = 0.0;
  39. for (integer k = 1; k <= ny; k ++) {
  40. t += thy data [i] [k] * evecy [j] [k];
  41. }
  42. his data [j] [i] = (double) t;
  43. }
  44. for (integer j = 1; j <= my numberOfCoefficients; j ++) {
  45. longdouble t = 0.0;
  46. for (integer k = 1; k <= nx; k ++) {
  47. t += thy data [i] [ny + k] * evecx [j] [k];
  48. }
  49. his data [my numberOfCoefficients + j] [i] = (double) t;
  50. }
  51. }
  52. return him;
  53. } catch (MelderError) {
  54. Melder_throw (U"TableOfReal not created from CCA & Correlation.");
  55. }
  56. }
  57. static void _CCA_Correlation_check (CCA me, Correlation thee, integer canonicalVariate_from, integer canonicalVariate_to) {
  58. Melder_require (my y -> dimension + my x -> dimension == thy numberOfColumns,
  59. U"The number of columns in the Correlation object should equal the sum of the dimensions in the CCA object");
  60. Melder_require (canonicalVariate_to >= canonicalVariate_from,
  61. U"The second value in the \"Canonical variate range\" should be equal or larger than the first.");
  62. Melder_require (canonicalVariate_from > 0 && canonicalVariate_to <= my numberOfCoefficients,
  63. U"The \"Canonical variate range\" should be within the interval [1, ", my numberOfCoefficients, U"].");
  64. }
  65. double CCA_Correlation_getVarianceFraction (CCA me, Correlation thee, int x_or_y, integer canonicalVariate_from, integer canonicalVariate_to) {
  66. _CCA_Correlation_check (me, thee, canonicalVariate_from, canonicalVariate_to);
  67. /* For the formulas see:
  68. William W. Cooley & Paul R. Lohnes (1971), Multivariate data Analysis, John Wiley & Sons, pag 170-...
  69. varianceFraction = s'.s / n,
  70. where e.g. for the independent set x:
  71. s = Rxx . c,
  72. and Rxx is the correlation matrix of x,
  73. c is the factor coefficient for x,
  74. nx is the dimension of x,
  75. The factor coefficient c is the eigenvector e for x scaled by the st.dev of the component,
  76. i.e. c = e / sqrt (e'.R.e) (pag 32-33).
  77. Therefore:
  78. varianceFraction = s'.s / n = c'Rxx' Rxx c/n = (e'.Rxx' Rxx.e) /(e'.Rxx.e) * 1/n
  79. (for one can. variate)
  80. */
  81. integer n = my x -> dimension;
  82. double **evec = my x -> eigenvectors.at;
  83. integer ioffset = my y -> dimension;
  84. if (x_or_y == 1) { /* y: dependent set */
  85. n = my y -> dimension;
  86. evec = my y -> eigenvectors.at;
  87. ioffset = 0;
  88. }
  89. longdouble varianceFraction = 0.0;
  90. for (integer icv = canonicalVariate_from; icv <= canonicalVariate_to; icv ++) {
  91. longdouble variance = 0.0, varianceScaling = 0.0;
  92. for (integer i = 1; i <= n; i ++) {
  93. longdouble si = 0.0;
  94. for (integer j = 1; j <= n; j++) {
  95. si += thy data [ioffset + i] [ioffset + j] * evec [icv] [j]; /* Rxx.e */
  96. }
  97. variance += si * si; /* (Rxx.e)'(Rxx.e) = e'.Rxx'.Rxx.e */
  98. varianceScaling += evec [icv] [i] * si; /* e'.Rxx.e*/
  99. }
  100. varianceFraction += (variance / varianceScaling) / n;
  101. }
  102. return (double) varianceFraction;
  103. }
  104. double CCA_Correlation_getRedundancy_sl (CCA me, Correlation thee, int x_or_y, integer canonicalVariate_from, integer canonicalVariate_to) {
  105. _CCA_Correlation_check (me, thee, canonicalVariate_from, canonicalVariate_to);
  106. longdouble redundancy = 0.0;
  107. for (integer icv = canonicalVariate_from; icv <= canonicalVariate_to; icv ++) {
  108. double varianceFraction = CCA_Correlation_getVarianceFraction (me, thee, x_or_y, icv, icv);
  109. if (isundef (varianceFraction)) {
  110. return undefined;
  111. }
  112. redundancy += varianceFraction * my y -> eigenvalues [icv];
  113. }
  114. return (double) redundancy;
  115. }
  116. /* End of file CCA_and_Correlation.cpp */