SSCP.cpp 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934
  1. /* SSCP.cpp
  2. *
  3. * Copyright (C) 1993-2018 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20010614 Covariance_difference: corrected bug in calculation of
  20. trace (A B^-1) that caused chisq values to be completely unreliable.
  21. djmw 20010628 TableOfReal_to_SSCP: skip error-return when nrows < 2,
  22. just issue warning.
  23. djmw 20010906 TableOfReal_to_SSCP.
  24. djmw 20020212 +getEllipse(s)BoundingBoxCoordinates.
  25. djmw 20020313 corrected SSCP_Eigen_project.
  26. djmw 20020327 Moved SSCP_Eigen_project to Eigen_SSCP.c.
  27. djmw 20020418 Removed some causes for compiler warnings.
  28. djmw 20020430 Changed explicit calculation of SSCP to svd in
  29. TableOfReal_to_SSCP.
  30. djmw 20030703 Replaced NUMincompletebeta with gsl_sf_beta_inc.
  31. djmw 20030801 SSCPs_drawConcentrationEllipses extra label argument.
  32. djmw 20030825 Replaced gsl_sf_beta_inc with NUMincompletebeta.
  33. djmw 20031104 Added SSCP_to_CCA.
  34. djmw 20031117 Added SSCP_extractCentroid.
  35. djmw 20031127 Added Covariance_TableOfReal_extractDistanceQuantileRange.
  36. djmw 20040211 Better warnings in TableOfReal_to_SSCPs_byLabel for single cases.
  37. djmw 20040214 Fixed some compiler warnings.
  38. djmw 20040219 SSCP_getTraceFraction added.
  39. djmw 20040617 SSCP(s)_drawConcentrationEllipse(s) draw with reverse axes possible.
  40. (not yet supported by commands in Picture window like 'One mark bottom...' because of reversed axes)!
  41. djmw 20060202 Removed a bug in TableOfReal_to_SSCP that could crash Praat (if nrows < ncols).
  42. djmw 20060503 Covariance_getSignificanceOfMeansDifference: set probability = 0 if
  43. var_pooled = 0 and paired.
  44. djmw 20060811 Removed bug in SSCP_TableOfReal_to_MahalanobisDistances that caused column labels always to be copied.
  45. djmw 20061214 Corrected possible integer overflow in ellipseScalefactor.
  46. djmw 20071012 Added: o_CAN_WRITE_AS_ENCODING.h
  47. djmw 20071016 To Melder_error<n>
  48. djmw 20071202 Melder_warning<n>
  49. djmw 20080122 float -> double
  50. djmw 20081119 TableOfReal_to_SSCP check if numbers are defined
  51. djmw 20090617 TableOfReal_to_SSCPs_byLabel better warnings for singular cases.
  52. djmw 20090629 +Covariances_getMultivariateCentroidDifference, Covariances_equality.
  53. djmw 20100106 +Covariance_TableOfReal_mahalanobis.
  54. djmw 20101019 Reduced storage Covariance.
  55. djmw 20110304 Thing_new
  56. */
  57. #include "SSCP.h"
  58. #include "Eigen.h"
  59. #include "NUMclapack.h"
  60. #include "NUM2.h"
  61. #include "SVD.h"
  62. #include "oo_DESTROY.h"
  63. #include "SSCP_def.h"
  64. #include "oo_COPY.h"
  65. #include "SSCP_def.h"
  66. #include "oo_EQUAL.h"
  67. #include "SSCP_def.h"
  68. #include "oo_CAN_WRITE_AS_ENCODING.h"
  69. #include "SSCP_def.h"
  70. #include "oo_WRITE_TEXT.h"
  71. #include "SSCP_def.h"
  72. #include "oo_READ_TEXT.h"
  73. #include "SSCP_def.h"
  74. #include "oo_WRITE_BINARY.h"
  75. #include "SSCP_def.h"
  76. #include "oo_READ_BINARY.h"
  77. #include "SSCP_def.h"
  78. #include "oo_DESCRIPTION.h"
  79. #include "SSCP_def.h"
  80. #define TOVEC(x) (&(x) - 1)
  81. Thing_implement (SSCP, TableOfReal, 0);
  82. Thing_implement (SSCPList, TableOfRealList, 0);
  83. Thing_implement (Covariance, SSCP, 0);
  84. Thing_implement (CovarianceList, SSCPList, 0);
  85. Thing_implement (Correlation, SSCP, 0);
  86. void structSSCP :: v_info () {
  87. structTableOfReal :: v_info ();
  88. double zmin, zmax;
  89. NUMmatrix_extrema<double> (data.at, 1, numberOfRows, 1, numberOfColumns, &zmin, &zmax);
  90. MelderInfo_writeLine (U"Minimum value: ", zmin);
  91. MelderInfo_writeLine (U"Maximum value: ", zmax);
  92. }
  93. /*
  94. Calculate scale factor by which sqrt(eigenvalue) has to
  95. be multiplied to obtain the length of an ellipse axis.
  96. */
  97. double SSCP_getEllipseScalefactor (SSCP me, double scale, bool confidence) {
  98. integer n = Melder_ifloor (SSCP_getNumberOfObservations (me));
  99. if (confidence) {
  100. integer p = my numberOfColumns;
  101. double f;
  102. if (n - p < 1) {
  103. return -1.0;
  104. }
  105. /* D.E. Johnson (1998), Applied Multivariate methods, page 410 */
  106. f = NUMinvFisherQ (1.0 - scale, p, n - p);
  107. scale = 2.0 * sqrt (f * p * (n - 1) / ( ((double) n) * (n - p)));
  108. } else {
  109. // very ugly, temporary hack
  110. scale *= 2.0 / (scale < 0.0 ? -1.0 : sqrt (n - 1));
  111. }
  112. return scale;
  113. }
  114. static void getEllipseBoundingBoxCoordinates (SSCP me, double scale, bool confidence, double *xmin, double *xmax, double *ymin, double *ymax) {
  115. double a, b, cs, width, height;
  116. double lscale = SSCP_getEllipseScalefactor (me, scale, confidence);
  117. NUMeigencmp22 (my data [1] [1], my data [1] [2], my data [2] [2], & a, & b, & cs, nullptr);
  118. NUMgetEllipseBoundingBox (sqrt (a), sqrt (b), cs, & width, & height);
  119. *xmin = my centroid [1] - lscale * width / 2.0;
  120. *xmax = *xmin + lscale * width;
  121. *ymin = my centroid [2] - lscale * height / 2.0;
  122. *ymax = *ymin + lscale * height;
  123. }
  124. void SSCPList_getEllipsesBoundingBoxCoordinates (SSCPList me, double scale, bool confidence, double *p_xmin, double *p_xmax, double *p_ymin, double *p_ymax) {
  125. double xmin = 1e308, xmax = -xmin, ymin = xmin, ymax = -ymin;
  126. for (integer i = 1; i <= my size; i ++) {
  127. SSCP s = my at [i];
  128. double xmn, xmx, ymn, ymx;
  129. getEllipseBoundingBoxCoordinates (s, scale, confidence, &xmn, &xmx, &ymn, &ymx);
  130. if (xmn < xmin) {
  131. xmin = xmn;
  132. }
  133. if (xmx > xmax) {
  134. xmax = xmx;
  135. }
  136. if (ymn < ymin) {
  137. ymin = ymn;
  138. }
  139. if (ymx > ymax) {
  140. ymax = ymx;
  141. }
  142. }
  143. if (p_xmin) {
  144. *p_xmin = xmin;
  145. }
  146. if (p_xmax) {
  147. *p_xmax = xmax;
  148. }
  149. if (p_ymin) {
  150. *p_ymin = ymin;
  151. }
  152. if (p_ymax) {
  153. *p_ymax = ymax;
  154. }
  155. }
  156. static autoSSCP _SSCP_extractTwoDimensions (SSCP me, integer d1, integer d2) {
  157. autoSSCP thee = SSCP_create (2);
  158. if (my numberOfRows == 1) { // diagonal
  159. thy data [1] [1] = my data [1] [d1];
  160. thy data [2] [2] = my data [1] [d2];
  161. } else {
  162. thy data [1] [1] = my data [d1] [d1];
  163. thy data [2] [2] = my data [d2] [d2];
  164. thy data [2] [1] = thy data [1] [2] = my data [d1] [d2];
  165. }
  166. thy centroid [1] = my centroid [d1];
  167. thy centroid [2] = my centroid [d2];
  168. thy numberOfObservations = my numberOfObservations;
  169. TableOfReal_setColumnLabel (thee.get(), 1, my columnLabels [d1].get());
  170. TableOfReal_setColumnLabel (thee.get(), 2, my columnLabels [d2].get());
  171. TableOfReal_setRowLabel (thee.get(), 1, my columnLabels [d1].get());
  172. TableOfReal_setRowLabel (thee.get(), 2, my columnLabels [d2].get());
  173. return thee;
  174. }
  175. autoSSCPList SSCPList_extractTwoDimensions (SSCPList me, integer d1, integer d2) {
  176. try {
  177. autoSSCPList thee = SSCPList_create ();
  178. for (integer i = 1; i <= my size; i ++) {
  179. autoSSCP t = _SSCP_extractTwoDimensions (my at [i], d1, d2);
  180. Thing_setName (t.get(), Thing_getName (my at [i]));
  181. thy addItem_move (t.move());
  182. }
  183. return thee;
  184. } catch (MelderError) {
  185. Melder_throw (me, U": cannot extract two dimensions.");
  186. }
  187. }
  188. void SSCP_drawTwoDimensionalEllipse_inside (SSCP me, Graphics g, double scale, conststring32 label, int fontSize) {
  189. try {
  190. integer nsteps = 100;
  191. autoNUMvector <double> x ((integer) 0, nsteps);
  192. autoNUMvector <double> y ((integer) 0, nsteps);
  193. // Get principal axes and orientation for the ellipse by performing the
  194. // eigen decomposition of a symmetric 2-by-2 matrix.
  195. // Principal axes are a and b with eigenvector/orientation (cs, sn).
  196. double a, b, cs, sn;
  197. NUMeigencmp22 (my data [1] [1], my data [1] [2], my data [2] [2], & a, & b, & cs, & sn);
  198. // 1. Take sqrt to get units of 'std_dev'
  199. a = scale * sqrt (a) / 2.0;
  200. b = scale * sqrt (b) / 2.0;
  201. x [nsteps] = x [0] = my centroid [1] + cs * a;
  202. y [nsteps] = y [0] = my centroid [2] + sn * a;
  203. double angle = 0.0;
  204. double angle_inc = NUM2pi / nsteps;
  205. for (integer i = 1; i < nsteps; i ++, angle += angle_inc) {
  206. double xc = a * cos (angle);
  207. double yc = b * sin (angle);
  208. double xt = xc * cs - yc * sn;
  209. y [i] = my centroid [2] + xc * sn + yc * cs;
  210. x [i] = my centroid [1] + xt;
  211. }
  212. Graphics_polyline (g, nsteps + 1, x.peek(), y.peek());
  213. if (label) {
  214. int oldFontSize = Graphics_inqFontSize (g);
  215. Graphics_setFontSize (g, fontSize);
  216. Graphics_setTextAlignment (g, Graphics_CENTRE, Graphics_HALF);
  217. Graphics_text (g, my centroid [1], my centroid [2], label);
  218. Graphics_setFontSize (g, oldFontSize);
  219. }
  220. } catch (MelderError) {
  221. Melder_clearError ();
  222. }
  223. }
  224. static void _SSCP_drawTwoDimensionalEllipse (SSCP me, Graphics g, double scale, int fontSize) {
  225. integer nsteps = 100;
  226. conststring32 name;
  227. autoNUMvector <double> x ((integer) 0, nsteps);
  228. autoNUMvector <double> y ((integer) 0, nsteps);
  229. // Get principal axes and orientation for the ellipse by performing the
  230. // eigen decomposition of a symmetric 2-by-2 matrix.
  231. // Principal axes are a and b with eigenvector/orientation (cs, sn).
  232. double a, b, cs, sn;
  233. NUMeigencmp22 (my data [1] [1], my data [1] [2], my data [2] [2], & a, & b, & cs, & sn);
  234. // 1. Take sqrt to get units of 'std_dev'
  235. a = scale * sqrt (a) / 2.0;
  236. b = scale * sqrt (b) / 2.0;
  237. x [nsteps] = x [0] = my centroid [1] + cs * a;
  238. y [nsteps] = y [0] = my centroid [2] + sn * a;
  239. double angle = 0.0;
  240. double angle_inc = NUM2pi / nsteps;
  241. for (integer i = 1; i < nsteps; i ++, angle += angle_inc) {
  242. double xc = a * cos (angle);
  243. double yc = b * sin (angle);
  244. double xt = xc * cs - yc * sn;
  245. y [i] = my centroid [2] + xc * sn + yc * cs;
  246. x [i] = my centroid [1] + xt;
  247. }
  248. Graphics_polyline (g, nsteps + 1, x.peek(), y.peek());
  249. if (fontSize > 0 && (name = Thing_getName (me)) != nullptr) {
  250. int oldFontSize = Graphics_inqFontSize (g);
  251. Graphics_setFontSize (g, fontSize);
  252. Graphics_setTextAlignment (g, Graphics_CENTRE, Graphics_HALF);
  253. Graphics_text (g, my centroid [1], my centroid [2], name);
  254. Graphics_setFontSize (g, oldFontSize);
  255. }
  256. }
  257. autoSSCP SSCP_toTwoDimensions (SSCP me, constVEC v1, constVEC v2) {
  258. try {
  259. Melder_assert (v1.size == v2.size && v1.size == my numberOfColumns);
  260. const double *vec [3];
  261. autoSSCP thee = SSCP_create (2);
  262. // Projection P of S on v1 and v2 (given matrix V' with 2 rows) is P = V'SV
  263. // P [i] [j] = sum(k) sum(m) V' [i] [k]*S [k] [m]*V [m] [j] = V' [i] [k]*S [k] [m]*V' [j] [m]
  264. // For the new centroids cnew [i] = sum(m) V' [i] [m]*c [m]
  265. vec [1] = v1.at; vec [2] = v2.at;
  266. if (my numberOfRows == 1) { // 1xn diagonal matrix
  267. for (integer k = 1; k <= my numberOfColumns; k ++) {
  268. thy data [1] [1] += v1 [k] * my data [1] [k] * v1 [k];
  269. }
  270. for (integer k = 1; k <= my numberOfColumns; k ++) {
  271. thy data [1] [2] += v1 [k] * my data [1] [k] * v2 [k];
  272. }
  273. for (integer k = 1; k <= my numberOfColumns; k ++) {
  274. thy data [2] [2] += v2 [k] * my data [1] [k] * v2 [k];
  275. }
  276. thy data [2] [1] = thy data [1] [2];
  277. } else {
  278. for (integer i = 1; i <= 2; i ++) {
  279. for (integer j = i; j <= 2; j ++) {
  280. double sum = 0;
  281. for (integer k = 1; k <= my numberOfRows; k ++) {
  282. for (integer m = 1; m <= my numberOfRows; m ++) {
  283. sum += vec [i] [k] * my data [k] [m] * vec [j] [m];
  284. }
  285. }
  286. thy data [j] [i] = thy data [i] [j] = sum;
  287. }
  288. }
  289. }
  290. // centroids
  291. for (integer m = 1; m <= my numberOfColumns; m ++) {
  292. thy centroid [1] += v1 [m] * my centroid [m];
  293. }
  294. for (integer m = 1; m <= my numberOfColumns; m ++) {
  295. thy centroid [2] += v2 [m] * my centroid [m];
  296. }
  297. thy numberOfObservations = SSCP_getNumberOfObservations (me);
  298. return thee;
  299. } catch (MelderError) {
  300. Melder_throw (me, U": cannot extract two dimensions.");
  301. }
  302. }
  303. void SSCP_init (SSCP me, integer dimension, integer storage) {
  304. TableOfReal_init (me, storage, dimension);
  305. my centroid = VECzero (dimension);
  306. }
  307. autoSSCP SSCP_create (integer dimension) {
  308. try {
  309. autoSSCP me = Thing_new (SSCP);
  310. SSCP_init (me.get(), dimension, dimension);
  311. return me;
  312. } catch (MelderError) {
  313. Melder_throw (U"SSCP not created.");
  314. }
  315. }
  316. double SSCP_getConcentrationEllipseArea (SSCP me, double scale, bool confidence, integer d1, integer d2) {
  317. Melder_require (d1 > 0 && d1 <= my numberOfRows && d2 > 0 && d2 <= my numberOfRows && d1 != d2,
  318. U"Incorrect axes.");
  319. autoSSCP thee = _SSCP_extractTwoDimensions (me, d1, d2);
  320. scale = SSCP_getEllipseScalefactor (thee.get(), scale, confidence);
  321. Melder_require (scale > 0, U"The scale factor should be larger than zero.");
  322. double a, b;
  323. NUMeigencmp22 (thy data [1] [1], thy data [1] [2], thy data [2] [2], & a, & b, nullptr, nullptr);
  324. // 1. Take sqrt to get units of 'std_dev'
  325. a = scale * sqrt (a) / 2.0;
  326. b = scale * sqrt (b) / 2.0;
  327. return NUMpi * a * b;
  328. }
  329. double SSCP_getFractionVariation (SSCP me, integer from, integer to) {
  330. integer n = my numberOfRows;
  331. if (from < 1 || from > to || to > n) {
  332. return undefined;
  333. }
  334. double sum = 0.0, trace = 0.0;
  335. for (integer i = 1; i <= n; i ++) {
  336. trace += my numberOfRows == 1 ? my data [1] [i] : my data [i] [i];
  337. if (i >= from && i <= to) {
  338. sum += my numberOfRows == 1 ? my data [1] [i] : my data [i] [i];
  339. }
  340. }
  341. return trace > 0.0 ? sum / trace : undefined;
  342. }
  343. void SSCP_drawConcentrationEllipse (SSCP me, Graphics g, double scale, int confidence, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int garnish) {
  344. Melder_require (d1 > 0 && d1 <= my numberOfRows && d2 > 0 && d2 <= my numberOfRows && d1 != d2, U"Incorrect axes.");
  345. autoSSCP thee = _SSCP_extractTwoDimensions (me, d1, d2);
  346. double xmn, xmx, ymn, ymx;
  347. getEllipseBoundingBoxCoordinates (thee.get(), scale, confidence, &xmn, &xmx, &ymn, &ymx);
  348. if (xmax == xmin) {
  349. xmin = xmn; xmax = xmx;
  350. }
  351. if (ymax == ymin) {
  352. ymin = ymn; ymax = ymx;
  353. }
  354. Graphics_setWindow (g, xmin, xmax, ymin, ymax);
  355. Graphics_setInner (g);
  356. scale = SSCP_getEllipseScalefactor (thee.get(), scale, confidence);
  357. Melder_require (scale > 0, U"The scale factor should be larger than zero.");
  358. _SSCP_drawTwoDimensionalEllipse (thee.get(), g, scale, 0);
  359. Graphics_unsetInner (g);
  360. if (garnish) {
  361. Graphics_drawInnerBox (g);
  362. Graphics_marksLeft (g, 2, true, true, false);
  363. Graphics_marksBottom (g, 2, true, true, false);
  364. }
  365. }
  366. void SSCP_setNumberOfObservations (SSCP me, double numberOfObservations) {
  367. my numberOfObservations = numberOfObservations;
  368. }
  369. double SSCP_getNumberOfObservations (SSCP me) {
  370. return my numberOfObservations;
  371. }
  372. double SSCP_getDegreesOfFreedom (SSCP me) {
  373. return my numberOfObservations - 1;
  374. }
  375. double SSCP_getTotalVariance (SSCP me) {
  376. double trace = 0.0;
  377. for (integer i = 1; i <= my numberOfColumns; i ++) {
  378. trace += my numberOfRows == 1 ? my data [1] [i] : my data [i] [i];
  379. }
  380. return trace;
  381. }
  382. double SSCP_getCumulativeContributionOfComponents (SSCP me, integer from, integer to) {
  383. double sum = undefined;
  384. if (to == 0) {
  385. to = my numberOfRows;
  386. }
  387. if (from > 0 && to <= my numberOfRows && from <= to) {
  388. sum = SSCP_getTotalVariance (me);
  389. double partial = 0.0;
  390. for (integer i = from; i <= to; i ++) {
  391. partial += my numberOfRows == 1 ? my data [1] [i] : my data [i] [i];
  392. }
  393. if (sum > 0.0) {
  394. sum = partial / sum;
  395. }
  396. }
  397. return sum;
  398. }
  399. /* For nxn matrix only ! */
  400. void Covariance_PCA_generateOneVector_inline (Covariance me, PCA thee, VEC vec, VEC buf) {
  401. // Generate the multi-normal vector elements N(0,sigma)
  402. Melder_require (thy dimension == my numberOfRows,
  403. U"The PCA must have the same dimension as the Covariance.");
  404. Melder_require (vec.size == buf.size && my numberOfColumns == buf.size,
  405. U"The vectors and the PCA must have the same dimension.");
  406. for (integer j = 1; j <= my numberOfColumns; j ++) {
  407. buf [j] = NUMrandomGauss (0.0, sqrt (thy eigenvalues [j]));
  408. }
  409. // Rotate back
  410. for (integer j = 1; j <= my numberOfColumns; j ++) {
  411. vec [j] = 0.0;
  412. for (integer k = 1; k <= my numberOfColumns; k ++) {
  413. vec [j] += buf [k] * thy eigenvectors [k] [j];
  414. }
  415. }
  416. // Add the centroid
  417. for (integer j = 1; j <= my numberOfColumns; j ++) {
  418. vec [j] += my centroid [j];
  419. }
  420. }
  421. autoTableOfReal Covariance_to_TableOfReal_randomSampling (Covariance me, integer numberOfData) {
  422. try {
  423. if (numberOfData <= 0)
  424. numberOfData = Melder_ifloor (my numberOfObservations);
  425. autoPCA pca = SSCP_to_PCA (me);
  426. autoTableOfReal thee = TableOfReal_create (numberOfData, my numberOfColumns);
  427. autoVEC buf (my numberOfColumns, kTensorInitializationType::RAW);
  428. for (integer i = 1; i <= numberOfData; i ++)
  429. Covariance_PCA_generateOneVector_inline (me, pca.get(), thy data.row (i), buf.get());
  430. thy columnLabels. copyElementsFrom (my columnLabels.get());
  431. return thee;
  432. } catch (MelderError) {
  433. Melder_throw (me, U": not random sampled.");
  434. }
  435. }
  436. autoSSCP TableOfReal_to_SSCP (TableOfReal me, integer rowb, integer rowe, integer colb, integer cole) {
  437. try {
  438. Melder_require (NUMdefined (my data.get()),
  439. U"All the table's elements should be defined.");
  440. fixAndCheckRowRange (& rowb, & rowe, my data.get(), 1);
  441. fixAndCheckColumnRange (& colb, & cole, my data.get(), 1);
  442. autoMAT part = MATpart (my data.get(), rowb, rowe, colb, cole);
  443. if (part.nrow < part.ncol)
  444. Melder_warning (U"The selected number of data points (", part.nrow,
  445. U") is less than the selected number of variables (", part.ncol,
  446. U").\nThe SSCP will not have full dimensionality. This may be a problem in later analysis steps."
  447. );
  448. autoSSCP thee = SSCP_create (part.ncol);
  449. VECcolumnMeans_preallocated (thy centroid.get(), part.get());
  450. MATsubtract_inplace (part.get(), thy centroid.get());
  451. SSCP_setNumberOfObservations (thee.get(), part.nrow);
  452. MATmtm_preallocated (thy data.get(), part.get()); // sum of squares and cross products = T'T
  453. for (integer j = 1; j <= part.ncol; j ++) {
  454. conststring32 label = my columnLabels [colb - 1 + j].get();
  455. TableOfReal_setColumnLabel (thee.get(), j, label);
  456. TableOfReal_setRowLabel (thee.get(), j, label);
  457. }
  458. return thee;
  459. } catch (MelderError) {
  460. Melder_throw (me, U": SSCP not created.");
  461. }
  462. }
  463. autoTableOfReal SSCP_TableOfReal_extractDistanceQuantileRange (SSCP me, TableOfReal thee, double qlow, double qhigh) {
  464. try {
  465. autoCovariance cov = SSCP_to_Covariance (me, 1);
  466. autoTableOfReal him = Covariance_TableOfReal_extractDistanceQuantileRange (cov.get(), thee, qlow, qhigh);
  467. return him;
  468. } catch (MelderError) {
  469. Melder_throw (me, U": no distance quantile ranges created.");
  470. }
  471. }
  472. autoTableOfReal Covariance_TableOfReal_mahalanobis (Covariance me, TableOfReal thee, bool useTableCentroid) {
  473. try {
  474. autoTableOfReal him = TableOfReal_create (thy numberOfRows, 1);
  475. /*
  476. ppgb
  477. ik kom er niet achter of onderstaande de hele vector kopieert of niet;
  478. in elk geval zijn hier enkele asserts nodig
  479. */
  480. autoVEC centroid = VECcopy (my centroid.get());
  481. autoMAT covari = matrixcopy (my data.get());
  482. /*
  483. Mahalanobis distance calculation. S = L.L' -> S**-1 = L**-1' . L**-1
  484. (x-m)'S**-1 (x-m) = (x-m)'L**-1' . L**-1. (x-m) =
  485. (L**-1.(x-m))' . (L**-1.(x-m))
  486. Get inverse of covari in lower triangular part.
  487. */
  488. double lndet;
  489. MATlowerCholeskyInverse_inplace (covari.get(), & lndet);
  490. if (useTableCentroid) {
  491. for (integer icol = 1; icol <= thy numberOfColumns; icol ++) {
  492. double mean = 0.0;
  493. for (integer irow = 1; irow <= thy numberOfRows; irow ++) {
  494. mean += thy data [irow] [icol];
  495. }
  496. centroid [icol] = mean / thy numberOfRows;
  497. }
  498. }
  499. for (integer k = 1; k <= thy numberOfRows; k ++) {
  500. his data [k] [1] = sqrt (NUMmahalanobisDistance (covari.get(), thy data.row(k), centroid.get()));
  501. if (thy rowLabels [k]) {
  502. TableOfReal_setRowLabel (him.get(), k, thy rowLabels [k].get());
  503. }
  504. }
  505. TableOfReal_setColumnLabel (him.get(), 1, U"d");
  506. return him;
  507. } catch (MelderError) {
  508. Melder_throw (me, U"no Mahalanobis distances created.");
  509. }
  510. }
  511. autoTableOfReal Covariance_TableOfReal_extractDistanceQuantileRange (Covariance me, TableOfReal thee, double qlow, double qhigh) {
  512. try {
  513. autoTableOfReal him = Covariance_TableOfReal_mahalanobis (me, thee, false);
  514. double low = TableOfReal_getColumnQuantile (him.get(), 1, qlow);
  515. double high = TableOfReal_getColumnQuantile (him.get(), 1, qhigh);
  516. // Count the number filtered.
  517. // nsel = (qhigh - qlow) * nrows is sometimes one off
  518. integer nsel = 0;
  519. for (integer i = 1; i <= thy numberOfRows; i ++) {
  520. if (low <= his data [i] [1] && his data [i] [1] < high) {
  521. nsel ++;
  522. }
  523. }
  524. Melder_require (nsel > 0, U"Not enough data in quantile interval.");
  525. autoTableOfReal r = TableOfReal_create (nsel, thy numberOfColumns);
  526. r -> columnLabels. copyElementsFrom (thy columnLabels.get());
  527. integer k = 0;
  528. for (integer i = 1; i <= thy numberOfRows; i ++) {
  529. if (low <= his data [i] [1] && his data [i] [1] < high) {
  530. k ++;
  531. TableOfReal_copyOneRowWithLabel (thee, r.get(), i, k);
  532. }
  533. }
  534. return r;
  535. } catch (MelderError) {
  536. Melder_throw (U"TableOfReal with distance quantile range not created.");
  537. }
  538. }
  539. autoCovariance TableOfReal_to_Covariance (TableOfReal me) {
  540. try {
  541. autoSSCP sscp = TableOfReal_to_SSCP (me, 0, 0, 0, 0);
  542. autoCovariance thee = SSCP_to_Covariance (sscp.get(), 1);
  543. return thee;
  544. } catch (MelderError) {
  545. Melder_throw (me, U": covariances not created.");
  546. }
  547. }
  548. autoCorrelation TableOfReal_to_Correlation (TableOfReal me) {
  549. try {
  550. autoSSCP sscp = TableOfReal_to_SSCP (me, 0, 0, 0, 0);
  551. autoCorrelation thee = SSCP_to_Correlation (sscp.get());
  552. return thee;
  553. } catch (MelderError) {
  554. Melder_throw (me, U": correlations not created.");
  555. }
  556. }
  557. autoCorrelation TableOfReal_to_Correlation_rank (TableOfReal me) {
  558. try {
  559. autoTableOfReal t = TableOfReal_rankColumns (me);
  560. autoCorrelation thee = TableOfReal_to_Correlation (t.get());
  561. return thee;
  562. } catch (MelderError) {
  563. Melder_throw (me, U": rank correlations not created.");
  564. }
  565. }
  566. autoSSCPList TableOfReal_to_SSCPList_byLabel (TableOfReal me) {
  567. try {
  568. autoSSCPList thee = SSCPList_create ();
  569. autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me);
  570. Melder_warningOff ();
  571. integer lastrow = 0, numberOfMatrices = 0, numberOfSingularMatrices = 0, index = 1;
  572. conststring32 label = mew -> rowLabels [1].get();
  573. for (integer i = 2; i <= my numberOfRows; i ++) {
  574. integer numberOfRowsInCurrent = 0;
  575. conststring32 currentLabel = mew -> rowLabels [i].get();
  576. if (Melder_cmp (currentLabel, label) != 0) {
  577. // current label different from previous one(s)
  578. numberOfRowsInCurrent = i - index;
  579. lastrow = i - 1;
  580. } else if (i == my numberOfRows) {
  581. // current (last) label is same as previous
  582. numberOfRowsInCurrent = i - index + 1;
  583. lastrow = i;
  584. } else {
  585. // next one
  586. continue;
  587. }
  588. // We found a new group
  589. numberOfMatrices ++;
  590. if (numberOfRowsInCurrent > 1) { // We need at least two rows for an SSCP
  591. if (numberOfRowsInCurrent < my numberOfColumns) {
  592. numberOfSingularMatrices ++;
  593. }
  594. autoSSCP t = TableOfReal_to_SSCP (mew.get(), index, lastrow, 0, 0);
  595. if (! (label = mew -> rowLabels [index].get())) {
  596. label = U"?";
  597. }
  598. Thing_setName (t.get(), label);
  599. thy addItem_move (t.move());
  600. }
  601. label = currentLabel;
  602. index = i;
  603. }
  604. if (lastrow != my numberOfRows) {
  605. numberOfMatrices ++;
  606. }
  607. Melder_warningOn ();
  608. if (numberOfSingularMatrices > 0 || thy size != numberOfMatrices) {
  609. integer notIncluded = numberOfMatrices - thy size;
  610. Melder_warning (numberOfMatrices, U" different groups detected: ", numberOfSingularMatrices + notIncluded,
  611. U" group(s) with less rows than columns (of which ", notIncluded, U" with only one row).");
  612. }
  613. return thee;
  614. } catch (MelderError) {
  615. Melder_throw (me, U": SSCP not created from labels.");
  616. }
  617. }
  618. autoPCA SSCP_to_PCA (SSCP me) {
  619. try {
  620. autoMAT mat = MATzero (my numberOfColumns, my numberOfColumns);
  621. autoPCA thee = PCA_create (my numberOfColumns, my numberOfColumns);
  622. if (my numberOfRows == 1) { // 1xn matrix -> nxn
  623. for (integer i = 1; i <= my numberOfColumns; i ++)
  624. mat [i] [i] = my data [1] [i];
  625. } else {
  626. matrixcopy_preallocated (mat.get(), my data.get());
  627. }
  628. thy labels. copyElementsFrom_upTo (my columnLabels.get(), my numberOfColumns);
  629. Eigen_initFromSymmetricMatrix (thee.get(), mat.get());
  630. VECcopy_preallocated (thy centroid.get(), my centroid.get());
  631. PCA_setNumberOfObservations (thee.get(), Melder_ifloor (my numberOfObservations));
  632. return thee;
  633. } catch (MelderError) {
  634. Melder_throw (me, U": PCA not created.");
  635. }
  636. }
  637. void SSCP_setValue (SSCP me, integer rowNumber, integer columnNumber, double value) {
  638. checkColumnNumber (columnNumber, my numberOfColumns);
  639. checkRowNumber (rowNumber, my numberOfRows);
  640. Melder_require (! (rowNumber == columnNumber && value <= 0.0), U"Diagonal element should always be a positive number.");
  641. if (my numberOfRows == 1) { // diagonal
  642. Melder_require (rowNumber == columnNumber, U"Row and column number should be equal for a diagonal matrix.");
  643. my data [1] [rowNumber] = value;
  644. } else {
  645. Melder_require (! (rowNumber != columnNumber && (fabs (value) > my data [rowNumber] [rowNumber] || fabs (value) > my data [columnNumber] [columnNumber])),
  646. U"The off-diagonal values should not be larger than the diagonal values. Input diagonal elements first, or change this value.");
  647. my data [rowNumber] [columnNumber] = my data [columnNumber] [rowNumber] = value;
  648. }
  649. }
  650. void SSCP_setCentroid (SSCP me, integer component, double value) {
  651. Melder_require (component > 0 && component <= my numberOfColumns, U"Component number should not exceed ", my numberOfColumns, U".");
  652. my centroid [component] = value;
  653. }
  654. autoCCA SSCP_to_CCA (SSCP me, integer ny) {
  655. try {
  656. char upper = 'L', diag = 'N';
  657. integer info;
  658. Melder_require (ny > 0 && ny < my numberOfRows, U"Invalid split.");
  659. Melder_require (my numberOfRows > 1, U"Matrix should not be diagonal.");
  660. integer m = my numberOfRows, nx = m - ny, xy_interchanged = nx < ny, yof = 0, xof = ny;
  661. if (xy_interchanged) {
  662. yof = ny; xof = 0;
  663. nx = ny; ny = m - nx;
  664. }
  665. // Copy Syy and Sxx into upper part of syy and sxx matrices.
  666. autoMAT syy = MATraw (ny, ny);
  667. for (integer i = 1; i <= ny; i ++) {
  668. for (integer j = i; j <= ny; j ++) {
  669. syy [i] [j] = my data [yof + i] [yof + j];
  670. }
  671. }
  672. autoMAT sxx = MATraw (nx, nx);
  673. for (integer i = 1; i <= nx; i ++) {
  674. for (integer j = i; j <= nx; j ++) {
  675. sxx [i] [j] = my data [xof + i] [xof + j];
  676. }
  677. }
  678. autoMAT syx = MATraw (ny, nx);
  679. for (integer i = 1; i <= nx; i ++) {
  680. for (integer j = 1; j <= ny; j ++) {
  681. syx [i] [j] = my data [yof + i] [xof + j];
  682. }
  683. }
  684. // Cholesky decomposition: Syy = Uy'*Uy and Sxx = Ux'*Ux.
  685. // (Pretend as if colum-major storage)
  686. (void) NUMlapack_dpotf2 (& upper, & ny, & syy [1] [1], & ny, & info);
  687. if (info != 0) Melder_throw (U"The leading minor of order ", info, U" is not positive definite, and the "
  688. U"factorization of Syy could not be completed.");
  689. (void) NUMlapack_dpotf2 (& upper, & nx, & sxx [1] [1], & nx, & info);
  690. if (info != 0) Melder_throw (U"The leading minor of order ", info, U" is not positive definite, and the "
  691. U"factorization of Sxx could not be completed.");
  692. /*
  693. With Cholesky decomps Sxx = Ux'* Ux, Syy = Uy * Uy'
  694. Sxx**-1 = Uxi * Uxi' and Syy**-1 = Uyi * Uyi', where
  695. Uxi = Ux**-1 and Uyi = Uy**-1, the equations
  696. (1) (Syx * Sxx**-1 * Syx' - lambda Syy) y = 0
  697. (1') (Syx' * Syy**-1 * Syx - lambda Sxx) x = 0
  698. can be written as:
  699. (2) (Syx * Uxi * Uxi' * Syx' - lambda Uy' * Uy) y = 0
  700. (2') (Syx' * Uyi * Uyi' * Syx - lambda Ux' * Ux) x = 0
  701. More explicitly as:
  702. (3) (Uxi' * Syx')' * (Uxi' * Syx') - lambda Uy' * Uy) y = 0
  703. (3') (Uyi' * Syx )' * (Uyi' * Syx ) - lambda Ux' * Ux) x = 0
  704. They are now in the form (A'A - lambda B'B) x = 0 and both can be solved with the GSVD.
  705. However, these equations are not independent. Both have the same
  706. eigenvalues and given the eigenvectors for one, the eigenvectors for
  707. the other can be calculated.
  708. If nx >= ny use eq. (3)
  709. GSVD (Uxi' * Syx', Uy) gives lambda's and y.
  710. To get x multiply (1) from the left by Syx'*Syy**-1
  711. (4) (Syx'*Syy**-1*Syx * Sxx**-1 - lambda ) Syx' * y = 0
  712. Split off Sxx**-1
  713. (5) (Syx'*Syy**-1*Syx -lambda Sxx) * Sxx**-1 * Syx' * y = 0
  714. It follows that x = Sxx**-1 * Syx' * y = Uxi * Uxi' * Sxy * y
  715. If ny > nx use eq. (3')
  716. We switch the role of x and y.
  717. */
  718. //autoNUMmatrix<double> a (1, nx, 1, ny);
  719. // Uxi = inverse(Ux)
  720. (void) NUMlapack_dtrti2 (& upper, & diag, & nx, & sxx [1] [1], & nx, & info);
  721. Melder_require (info == 0, U"Error in inverse for Sxx.");
  722. // Prepare Uxi' * Syx' = (Syx * Uxi)'
  723. autoMAT a = MATmul_tt (sxx.get(), syx.get());
  724. Melder_assert (a.nrow == nx && a.ncol == ny);
  725. autoGSVD gsvd = GSVD_create_d (a.get(), syy.get());
  726. autoMAT ri = MATcopy (gsvd -> r.get());
  727. autoCCA thee = Thing_new (CCA);
  728. thy y = Eigen_create (gsvd -> numberOfColumns, gsvd -> numberOfColumns);
  729. thy x = Eigen_create (thy y -> numberOfEigenvalues, nx);
  730. // Get X=Q*R**-1
  731. (void) NUMlapack_dtrti2 (& upper, & diag, & gsvd -> numberOfColumns, & ri [1] [1], & gsvd -> numberOfColumns, & info);
  732. Melder_require (info == 0, U"Error in inverse for R.");
  733. for (integer i = 1; i <= gsvd -> numberOfColumns; i ++) {
  734. double t = gsvd -> d1 [i] / gsvd -> d2 [i];
  735. thy y -> eigenvalues [i] = t * t;
  736. for (integer j = 1; j <= gsvd -> numberOfColumns; j ++) {
  737. t = 0.0;
  738. for (integer k = 1; k <= j; k ++) {
  739. t += gsvd -> q [i] [k] * ri [k] [j];
  740. }
  741. thy y -> eigenvectors [j] [i] = t;
  742. }
  743. }
  744. MATnormalizeRows_inplace (thy y -> eigenvectors.get(), 2.0, 1.0);
  745. thy numberOfCoefficients = thy y -> numberOfEigenvalues;
  746. thy numberOfObservations = Melder_ifloor (my numberOfObservations);
  747. // x = Sxx**-1 * Syx' * y
  748. for (integer i = 1; i <= thy numberOfCoefficients; i ++) {
  749. double *evecy = thy y -> eigenvectors [i];
  750. double *evecx = thy x -> eigenvectors [i];
  751. for (integer j = 1; j <= nx; j ++) {
  752. double t = 0.0;
  753. for (integer k = j; k <= nx; k ++) {
  754. for (integer l = 1; l <= nx; l ++) {
  755. for (integer n = 1; n <= ny; n ++) {
  756. t += sxx [j] [k] * sxx [l] [k] * syx [n] [l] * evecy [n];
  757. }
  758. }
  759. }
  760. evecx [j] = t;
  761. }
  762. }
  763. MATnormalizeRows_inplace (thy x -> eigenvectors.get(), 2.0, 1.0);
  764. if (ny < nx) {
  765. autoEigen t = thy x.move();
  766. thy x = thy y.move(); thy y = t.move();
  767. }
  768. return thee;
  769. } catch (MelderError) {
  770. Melder_throw (me, U": CCA not created.");
  771. }
  772. }
  773. /************ SSCPList ***********************************************/
  774. autoSSCP SSCPList_to_SSCP_pool (SSCPList me) {
  775. try {
  776. autoSSCP thee = Data_copy (my at [1]);
  777. for (integer k = 2; k <= my size; k ++) {
  778. SSCP t = my at [k];
  779. Melder_require (t -> numberOfRows == thy numberOfRows, U"The dimension of item ", k, U" should agree.");
  780. thy numberOfObservations += t -> numberOfObservations;
  781. // Sum the sscp's and weigh the centroid.
  782. for (integer i = 1; i <= thy numberOfRows; i ++) { // if 1xn
  783. for (integer j = 1; j <= thy numberOfColumns; j ++) {
  784. thy data [i] [j] += t -> data [i] [j];
  785. }
  786. }
  787. for (integer j = 1; j <= thy numberOfRows; j ++) {
  788. thy centroid [j] += t -> numberOfObservations * t -> centroid [j];
  789. }
  790. }
  791. for (integer i = 1; i <= thy numberOfRows; i ++) {
  792. thy centroid [i] /= thy numberOfObservations;
  793. }
  794. return thee;
  795. } catch (MelderError) {
  796. Melder_throw (me, U": not pooled.");
  797. }
  798. }
  799. autoCovariance CovarianceList_to_Covariance_pool (CovarianceList me) { // Morrison sec 3.5, page 100
  800. try {
  801. autoCovariance thee = Data_copy (my at [1]);
  802. double scaleFactor = thy numberOfObservations - 1.0;
  803. for (integer i = 1; i <= thy numberOfRows; i ++) {
  804. for (integer j = 1; j <= thy numberOfColumns; j ++) {
  805. thy data [i] [j] *= scaleFactor;
  806. }
  807. }
  808. for (integer k = 2; k <= my size; k ++) {
  809. Covariance t = my at [k];
  810. Melder_require (t -> numberOfRows == thy numberOfRows, U"The dimension of item ", k, U" should agree.");
  811. thy numberOfObservations += t -> numberOfObservations;
  812. // Sum the sscp's and weigh the centroid.
  813. scaleFactor = t -> numberOfObservations - 1.0;
  814. for (integer i = 1; i <= thy numberOfRows; i ++) { // if 1xn
  815. for (integer j = 1; j <= thy numberOfColumns; j ++) {
  816. thy data [i] [j] += scaleFactor * t -> data [i] [j];
  817. }
  818. }
  819. for (integer j = 1; j <= thy numberOfRows; j ++) {
  820. thy centroid [j] += t -> numberOfObservations * t -> centroid [j];
  821. }
  822. }
  823. for (integer i = 1; i <= thy numberOfRows; i ++) {
  824. thy centroid [i] /= thy numberOfObservations;
  825. }
  826. scaleFactor = 1.0 / (thy numberOfObservations - my size);
  827. for (integer i = 1; i <= thy numberOfRows; i ++) { // if 1xn
  828. for (integer j = 1; j <= thy numberOfColumns; j ++) {
  829. thy data [i] [j] *= scaleFactor;
  830. }
  831. }
  832. return thee;
  833. } catch (MelderError) {
  834. Melder_throw (me, U": not pooled.");
  835. }
  836. }
  837. void SSCPList_getHomegeneityOfCovariances_box (SSCPList me, double *p_prob, double *p_chisq, double *p_df) {
  838. double chisq = 0.0, df = undefined;
  839. autoSSCP pooled = SSCPList_to_SSCP_pool (me);
  840. integer p = pooled -> numberOfColumns;
  841. double ln_determinant, inv = 0.0, sum = 0.0, g = my size;
  842. for (integer i = 1; i <= g; i ++) {
  843. SSCP t = my at [i];
  844. double ni = t -> numberOfObservations - 1.0;
  845. ln_determinant = MATdeterminant_fromSymmetricMatrix (t -> data.get());
  846. // Box-test is for covariance matrices -> scale determinant.
  847. ln_determinant -= p * log (ni);
  848. sum += ni;
  849. inv += 1.0 / ni;
  850. chisq -= ni * ln_determinant;
  851. }
  852. ln_determinant = MATdeterminant_fromSymmetricMatrix (pooled -> data.get());
  853. ln_determinant -= p * log (pooled -> numberOfObservations - g);
  854. chisq += sum * ln_determinant;
  855. chisq *= 1.0 - (inv - 1.0 / sum) * (2.0 * p * p + 3.0 * p - 1.0) / (6.0 * (p + 1) * (g - 1.0));
  856. df = (g - 1.0) * p * (p + 1) / 2.0;
  857. if (p_prob) {
  858. *p_prob = NUMchiSquareQ (chisq, df);
  859. }
  860. if (p_chisq) {
  861. *p_chisq = chisq;
  862. }
  863. if (p_df) {
  864. *p_df = df;
  865. }
  866. }
  867. autoSSCPList SSCPList_toTwoDimensions (SSCPList me, constVEC v1, constVEC v2) {
  868. try {
  869. autoSSCPList thee = SSCPList_create ();
  870. for (integer i = 1; i <= my size; i ++) {
  871. autoSSCP t = SSCP_toTwoDimensions (my at [i], v1, v2);
  872. Thing_setName (t.get(), Thing_getName (my at [i]));
  873. thy addItem_move (t.move());
  874. }
  875. return thee;
  876. } catch (MelderError) {
  877. Melder_throw (me, U": not reduced to two dimensions.");
  878. }
  879. }
  880. void SSCPList_drawConcentrationEllipses (SSCPList me, Graphics g,
  881. double scale, bool confidence, conststring32 label,
  882. integer d1, integer d2, double xmin, double xmax, double ymin, double ymax,
  883. int fontSize, bool garnish)
  884. {
  885. SSCP t = my at [1];
  886. Melder_require (d1 > 0 && d1 <= t -> numberOfColumns && d2 > 0 && d2 <= t -> numberOfColumns && d1 != d2, U"Incorrect axes.");
  887. autoSSCPList thee = SSCPList_extractTwoDimensions (me, d1, d2);
  888. /*
  889. Autowindowing.
  890. */
  891. if (xmin == xmax || ymin == ymax) {
  892. double boundingBox_xmin, boundingBox_xmax, boundingBox_ymin, boundingBox_ymax;
  893. SSCPList_getEllipsesBoundingBoxCoordinates (thee.get(), scale, confidence,
  894. & boundingBox_xmin, & boundingBox_xmax, & boundingBox_ymin, & boundingBox_ymax);
  895. if (xmin == xmax) {
  896. xmin = boundingBox_xmin;
  897. xmax = boundingBox_xmax;
  898. }
  899. if (ymin == ymax) {
  900. ymin = boundingBox_ymin;
  901. ymax = boundingBox_ymax;
  902. }
  903. }
  904. Graphics_setWindow (g, xmin, xmax, ymin, ymax);
  905. Graphics_setInner (g);
  906. for (integer i = 1; i <= thy size; i ++) {
  907. t = thy at [i];
  908. double lscale = SSCP_getEllipseScalefactor (t, scale, confidence);
  909. if (lscale < 0.0) {
  910. continue;
  911. }
  912. if (! label || Melder_cmp (label, Thing_getName (t)) == 0) {
  913. _SSCP_drawTwoDimensionalEllipse (t, g, lscale, fontSize);
  914. }
  915. }
  916. Graphics_unsetInner (g);
  917. if (garnish) {
  918. t = my at [1];
  919. Graphics_drawInnerBox (g);
  920. Graphics_marksLeft (g, 2, true, true, false);
  921. Graphics_textLeft (g, true, t -> columnLabels [d2] ? t -> columnLabels [d2].get() : Melder_cat (U"Dimension ", d2));
  922. Graphics_marksBottom (g, 2, true, true, false);
  923. Graphics_textBottom (g, true, t -> columnLabels [d1] ? t -> columnLabels [d1].get() : Melder_cat (U"Dimension ", d1));
  924. }
  925. }
  926. autoTableOfReal SSCP_to_TableOfReal (SSCP me) {
  927. try {
  928. autoTableOfReal thee = Thing_new (TableOfReal);
  929. my structTableOfReal :: v_copy (thee.get());
  930. return thee;
  931. } catch (MelderError) {
  932. Melder_throw (me, U": not copied.");
  933. }
  934. }
  935. autoTableOfReal SSCP_extractCentroid (SSCP me) {
  936. try {
  937. autoTableOfReal thee = TableOfReal_create (1, my numberOfColumns);
  938. VECcopy_preallocated (thy data.row(1), my centroid.get());
  939. thy columnLabels. copyElementsFrom (my columnLabels.get());
  940. return thee;
  941. } catch (MelderError) {
  942. Melder_throw (me, U": centroid not extracted.");
  943. }
  944. }
  945. autoCovariance Covariance_create (integer dimension) {
  946. try {
  947. autoCovariance me = Thing_new (Covariance);
  948. SSCP_init (me.get(), dimension, dimension);
  949. return me;
  950. } catch (MelderError) {
  951. Melder_throw (U"Covariance not created.");
  952. }
  953. }
  954. autoCovariance Covariance_create_reduceStorage (integer dimension, integer storage) {
  955. try {
  956. autoCovariance me = Thing_new (Covariance);
  957. if (storage <= 0 || storage >= dimension) {
  958. storage = dimension;
  959. }
  960. SSCP_init (me.get(), dimension, storage);
  961. return me;
  962. } catch (MelderError) {
  963. Melder_throw (U"Reduced storage covariance not created.");
  964. }
  965. }
  966. autoCovariance Covariance_createSimple (conststring32 s_covariances, conststring32 s_centroid, integer numberOfObservations) {
  967. try {
  968. autoVEC centroid = VEC_createFromString (s_centroid);
  969. autoVEC covariances = VEC_createFromString (s_covariances);
  970. integer numberOfCovariances_wanted = centroid.size * (centroid.size + 1) / 2;
  971. Melder_require (covariances.size == numberOfCovariances_wanted,
  972. U"The number of covariance matrix elements and the number of centroid elements (d) should conform. "
  973. "There should be d(d+1)/2 covariance values and d centroid values.");
  974. autoCovariance me = Covariance_create (centroid.size);
  975. // Construct the full covariance matrix from the upper-diagonal elements
  976. integer rowNumber = 1;
  977. for (integer inum = 1; inum <= covariances.size; inum ++) {
  978. integer nmissing = (rowNumber - 1) * rowNumber / 2;
  979. integer inumc = inum + nmissing;
  980. rowNumber = (inumc - 1) / centroid.size + 1;
  981. integer icol = ((inumc - 1) % centroid.size) + 1;
  982. my data [rowNumber] [icol] = my data [icol] [rowNumber] = covariances [inum];
  983. if (icol == centroid.size) {
  984. rowNumber ++;
  985. }
  986. }
  987. // Check if a valid covariance, first check variances then covariances
  988. for (integer irow = 1; irow <= centroid.size; irow ++) {
  989. Melder_require (my data [irow] [irow] > 0.0, U"The diagonal matrix elements should all be positive numbers.");
  990. }
  991. for (integer irow = 1; irow <= centroid.size; irow ++) {
  992. for (integer icol = irow + 1; icol <= centroid.size; icol ++) {
  993. if (fabs (my data [irow] [icol] / sqrt (my data [irow] [irow] * my data [icol] [icol])) > 1) {
  994. integer nmissing = (irow - 1) * irow / 2;
  995. integer inum = (irow - 1) * centroid.size + icol - nmissing;
  996. Melder_throw (U"The covariance in cell [", irow, U",", icol, U"], i.e. input item ", inum, U" is too large.");
  997. }
  998. }
  999. }
  1000. for (integer inum = 1; inum <= centroid.size; inum ++) {
  1001. my centroid [inum] = centroid [inum];
  1002. }
  1003. my numberOfObservations = numberOfObservations;
  1004. return me;
  1005. } catch (MelderError) {
  1006. Melder_throw (U"Simple Covariance not created.");
  1007. }
  1008. }
  1009. autoCorrelation Correlation_createSimple (conststring32 s_correlations, conststring32 s_centroid, integer numberOfObservations) {
  1010. try {
  1011. autoVEC centroids = VEC_createFromString (s_centroid);
  1012. autoVEC correlations = VEC_createFromString (s_correlations);
  1013. integer numberOfCorrelations_wanted = centroids.size * (centroids.size + 1) / 2;
  1014. Melder_require (correlations.size == numberOfCorrelations_wanted,
  1015. U"The number of correlation matrix elements and the number of centroid elements should agree. "
  1016. "There should be d(d+1)/2 correlation values and d centroid values.");
  1017. autoCorrelation me = Correlation_create (centroids.size);
  1018. // Construct the full correlation matrix from the upper-diagonal elements
  1019. integer rowNumber = 1;
  1020. for (integer inum = 1; inum <= correlations.size; inum ++) {
  1021. integer nmissing = (rowNumber - 1) * rowNumber / 2;
  1022. integer inumc = inum + nmissing;
  1023. rowNumber = (inumc - 1) / centroids.size + 1;
  1024. integer icol = ( (inumc - 1) % centroids.size) + 1;
  1025. my data [rowNumber] [icol] = my data [icol] [rowNumber] = correlations [inum];
  1026. if (icol == centroids.size) {
  1027. rowNumber ++;
  1028. }
  1029. }
  1030. // Check if a valid correlations, first check diagonal then off-diagonals
  1031. for (integer irow = 1; irow <= centroids.size; irow ++) {
  1032. Melder_require (my data [irow] [irow] == 1.0, U"The diagonal matrix elements should all equal 1.0.");
  1033. }
  1034. for (integer irow = 1; irow <= centroids.size; irow ++) {
  1035. for (integer icol = irow + 1; icol <= centroids.size; icol ++) {
  1036. if (fabs (my data [irow] [icol]) > 1) {
  1037. integer nmissing = (irow - 1) * irow / 2;
  1038. integer inum = (irow - 1) * centroids.size + icol - nmissing;
  1039. Melder_throw (U"The correlation in cell [", irow, U",", icol, U"], i.e. input item ", inum, U" should not exceed 1.0.");
  1040. }
  1041. }
  1042. }
  1043. for (integer inum = 1; inum <= centroids.size; inum ++) {
  1044. my centroid [inum] = centroids [inum];
  1045. }
  1046. my numberOfObservations = numberOfObservations;
  1047. return me;
  1048. } catch (MelderError) {
  1049. Melder_throw (U"Simple Correlation not created.");
  1050. }
  1051. }
  1052. autoCorrelation Correlation_create (integer dimension) {
  1053. try {
  1054. autoCorrelation me = Thing_new (Correlation);
  1055. SSCP_init (me.get(), dimension, dimension);
  1056. return me;
  1057. } catch (MelderError) {
  1058. Melder_throw (U"Correlation not created.");
  1059. }
  1060. }
  1061. autoCovariance SSCP_to_Covariance (SSCP me, integer numberOfConstraints) {
  1062. try {
  1063. Melder_assert (numberOfConstraints >= 0);
  1064. autoCovariance thee = Thing_new (Covariance);
  1065. my structSSCP :: v_copy (thee.get());
  1066. for (integer irow = 1; irow <= my numberOfRows; irow ++) {
  1067. for (integer icol = irow; icol <= my numberOfColumns; icol ++) { // a covariance matrix is symmetric
  1068. thy data [icol] [irow] = thy data [irow] [icol] /= my numberOfObservations - numberOfConstraints;
  1069. }
  1070. }
  1071. return thee;
  1072. } catch (MelderError) {
  1073. Melder_throw (me, U"; Covariance not created.");
  1074. }
  1075. }
  1076. autoSSCP Covariance_to_SSCP (Covariance me) {
  1077. try {
  1078. autoSSCP thee = Thing_new (SSCP);
  1079. my structSSCP :: v_copy (thee.get());
  1080. for (integer irow = 1; irow <= my numberOfRows; irow ++) {
  1081. for (integer icol = irow; icol <= my numberOfColumns; icol ++) {
  1082. thy data [icol] [irow] = thy data [irow] [icol] *= my numberOfObservations - 1;
  1083. }
  1084. }
  1085. return thee;
  1086. } catch (MelderError) {
  1087. Melder_throw (me, U": SSCP not created.");
  1088. }
  1089. }
  1090. autoCorrelation SSCP_to_Correlation (SSCP me) {
  1091. try {
  1092. autoCorrelation thee = Thing_new (Correlation);
  1093. my structSSCP :: v_copy (thee.get());
  1094. for (integer i = 1; i <= my numberOfRows; i ++) {
  1095. for (integer j = i; j <= my numberOfColumns; j ++) {
  1096. thy data [j] [i] = thy data [i] [j] /= sqrt (my data [i] [i] * my data [j] [j]);
  1097. }
  1098. }
  1099. return thee;
  1100. } catch (MelderError) {
  1101. Melder_throw (me, U": Correlation not created.");
  1102. }
  1103. }
  1104. double SSCP_getLnDeterminant (SSCP me) {
  1105. try {
  1106. return MATdeterminant_fromSymmetricMatrix (my data.get());
  1107. } catch (MelderError) {
  1108. return undefined;
  1109. }
  1110. }
  1111. static autoCovariance Covariances_pool (Covariance me, Covariance thee) {
  1112. try {
  1113. Melder_require (my numberOfRows == thy numberOfRows && my numberOfColumns == thy numberOfColumns,
  1114. U"Matrices should have equal dimensions.");
  1115. autoSSCPList sscps = SSCPList_create ();
  1116. autoSSCP sscp1 = Covariance_to_SSCP (me);
  1117. sscps -> addItem_move (sscp1.move());
  1118. autoSSCP sscp2 = Covariance_to_SSCP (thee);
  1119. sscps -> addItem_move (sscp2.move());
  1120. autoSSCP pool = SSCPList_to_SSCP_pool (sscps.get());
  1121. autoCovariance him = SSCP_to_Covariance (pool.get(), 2);
  1122. return him;
  1123. } catch (MelderError) {
  1124. Melder_throw (me, U"not pooled.");
  1125. }
  1126. }
  1127. static double traceOfSquaredMatrixProduct (double **s1, double **s2, integer n) {
  1128. // tr ((s1*s2)^2), s1, s2 are symmetric
  1129. autoMAT m = MATmul ({s1,n,n}, {s2,n, n});
  1130. double trace2 = NUMtrace2_nn (m.get(), m.get());
  1131. return trace2;
  1132. }
  1133. double Covariance_getProbabilityAtPosition_string (Covariance me, conststring32 vector_string) {
  1134. autostring32vector vector = STRVECtokenize (vector_string);
  1135. autoVEC v = VECzero (my numberOfColumns);
  1136. for (integer i = 1; i <= vector.size; i ++) {
  1137. v [i] = Melder_atof (vector [i].get());
  1138. if (i == my numberOfColumns)
  1139. break;
  1140. }
  1141. double p = Covariance_getProbabilityAtPosition (me, v.get());
  1142. return p;
  1143. }
  1144. double Covariance_getProbabilityAtPosition (Covariance me, constVEC x) {
  1145. Melder_require (x.size == my numberOfColumns,
  1146. U"The dimensions of the Covariance and the vector should agree.");
  1147. if (NUMisEmpty (my lowerCholesky.get()))
  1148. SSCP_expandLowerCholesky (me);
  1149. double ln2pid = my numberOfColumns * log (NUM2pi);
  1150. double dsq = NUMmahalanobisDistance (my lowerCholesky.get(), x, my centroid.get());
  1151. double lnN = - 0.5 * (ln2pid + my lnd + dsq);
  1152. double p = exp (lnN);
  1153. return p;
  1154. }
  1155. double Covariance_getMarginalProbabilityAtPosition (Covariance me, constVEC vector, double x) {
  1156. double mu, stdev;
  1157. Covariance_getMarginalDensityParameters (me, vector, &mu, &stdev);
  1158. double dx = (x - mu) / stdev;
  1159. double p = (NUM1_sqrt2pi / stdev) * exp (- 0.5 * dx * dx);
  1160. return p;
  1161. }
  1162. /* Precondition ||v|| = 1 */
  1163. void Covariance_getMarginalDensityParameters (Covariance me, constVEC v, double *p_mu, double *p_stdev) {
  1164. Melder_assert (v.size == my numberOfColumns);
  1165. if (p_mu) {
  1166. longdouble mu = 0.0;
  1167. for (integer m = 1; m <= my numberOfColumns; m ++) {
  1168. mu += v [m] * my centroid [m];
  1169. }
  1170. *p_mu = (double) mu;
  1171. }
  1172. if (p_stdev) {
  1173. longdouble stdev = 0.0;
  1174. if (my numberOfRows == 1) { // 1xn diagonal matrix
  1175. for (integer m = 1; m <= my numberOfColumns; m ++) {
  1176. stdev += v [m] * my data [1] [m] * v [m];
  1177. }
  1178. } else {
  1179. for (integer k = 1; k <= my numberOfRows; k ++) {
  1180. for (integer m = 1; m <= my numberOfColumns; m ++) {
  1181. stdev += v [k] * my data [k] [m] * v [m];
  1182. }
  1183. }
  1184. }
  1185. *p_stdev = sqrt (stdev);
  1186. }
  1187. }
  1188. double Covariances_getMultivariateCentroidDifference (Covariance me, Covariance thee, int equalCovariances, double *p_prob, double *p_fisher, double *p_df1, double *p_df2) {
  1189. integer p = my numberOfRows, N = Melder_ifloor (my numberOfObservations + thy numberOfObservations);
  1190. integer N1 = Melder_ifloor (my numberOfObservations), n1 = N1 - 1;
  1191. integer N2 = Melder_ifloor (thy numberOfObservations), n2 = N2 - 1;
  1192. double dif = undefined, fisher = undefined;
  1193. double df1 = p, df2 = N - p - 1;
  1194. Melder_require (df2 >= 1.0, U"Not enough observations (", N, U") for this test.");
  1195. Melder_require (p >= N1 && p >= N2, U"The number of observations should be larger than the number of variables.");
  1196. dif = 0;
  1197. for (integer i = 1; i <= p; i ++) {
  1198. double dist = my centroid [i] - thy centroid [i];
  1199. dif += dist * dist;
  1200. }
  1201. dif = sqrt (dif);
  1202. if (equalCovariances) {
  1203. // Morrison, page 141
  1204. autoCovariance pool = Covariances_pool (me, thee);
  1205. Melder_assert (my data.ncol == p); // ppgb 20180913
  1206. autoMAT s = matrixcopy (my data.get());
  1207. double lndet;
  1208. MATlowerCholeskyInverse_inplace (s.get(), & lndet);
  1209. double mahalanobis = NUMmahalanobisDistance (s.get(), my centroid.get(), thy centroid.get());
  1210. double hotelling_tsq = mahalanobis * N1 * N2 / N;
  1211. fisher = hotelling_tsq * df2 / ( (N - 2) * df1);
  1212. } else {
  1213. /* Krishnamoorthy-Yu (2004): Modified Nel and Van der Merwe test
  1214. Hotelling t^2 = (x1-x2)'*S^-1*(x1 -x2) follows nu*p*Fisher(p,nu-p+1)/(nu-p+1)
  1215. Approximate number of degrees of freedom (their formula 7, page 164)
  1216. nu = (p+p^2)/((1/n1)(tr (S1*S^-1)^2 + (tr(S1*S^-1))^2)) +(1/n2)(tr (S2*S^-1)^2 + (tr(S2*S^-1))^2)))
  1217. the matrices S1 and S2 are the covariance matrices 'my data' and 'thy data' divided by N1 and N2 respectively.
  1218. S is the pooled covar divided by N.
  1219. */
  1220. autoMAT s1 = MATraw (p, p), s2 = MATraw (p, p), s = MATraw (p, p);
  1221. for (integer i = 1; i <= p; i ++) {
  1222. for (integer j = 1; j <= p; j ++) {
  1223. s1 [i] [j] = my data [i] [j] / my numberOfObservations;
  1224. s2 [i] [j] = thy data [i] [j] / thy numberOfObservations;
  1225. s [i] [j] = s1 [i] [j] + s2 [i] [j];
  1226. }
  1227. }
  1228. double lndet;
  1229. MATlowerCholeskyInverse_inplace (s.get(), & lndet);
  1230. // Krishan... formula 2, page 162
  1231. double hotelling_tsq = NUMmahalanobisDistance (s.get(), my centroid.get(), thy centroid.get());
  1232. autoMAT si = MATinverse_fromLowerCholeskyInverse (s.get());
  1233. double tr_s1sisqr = traceOfSquaredMatrixProduct (s1.at, si.at, p);
  1234. double tr_s1si = NUMtrace2_nn (s1.get(), si.get());
  1235. double tr_s2sisqr = traceOfSquaredMatrixProduct (s2.at, si.at, p);
  1236. double tr_s2si = NUMtrace2_nn (s2.get(), si.get());
  1237. double nu = (p + p * p) / ( (tr_s1sisqr + tr_s1si * tr_s1si) / n1 + (tr_s2sisqr + tr_s2si * tr_s2si) / n2);
  1238. df2 = nu - p + 1;
  1239. fisher = hotelling_tsq * (nu - p + 1) / (nu * p);
  1240. }
  1241. if (p_prob) {
  1242. *p_prob = NUMfisherQ (fisher, df1, df2);
  1243. }
  1244. if (p_fisher) {
  1245. *p_fisher = fisher;
  1246. }
  1247. if (p_df1) {
  1248. *p_df1 = df1;
  1249. }
  1250. if (p_df2) {
  1251. *p_df2 = df2;
  1252. }
  1253. return dif;
  1254. }
  1255. /* Schott 2001 */
  1256. void Covariances_equality (CovarianceList me, int method, double *p_prob, double *p_chisq, double *p_df) {
  1257. try {
  1258. integer numberOfMatrices = my size;
  1259. double chisq = undefined, df = undefined;
  1260. Melder_require (numberOfMatrices > 1, U"We need at least two matrices");
  1261. autoCovariance pool = CovarianceList_to_Covariance_pool (me);
  1262. double ns = pool -> numberOfObservations - my size;
  1263. integer p = pool -> numberOfColumns;
  1264. if (method == 1) {
  1265. /* Bartlett (see Morrison page 297)
  1266. The hypothesis H0 : Sigma [1] = .... = Sigma [k] of the equality of the covariance matrices of k p-dimensional
  1267. multinormal populations can be tested against the alternative by a modified generalized likelihood-ratio statistic.
  1268. Let S [i] be the unbiased estimate of Sigma [i] based on n [i] degrees of freedom, where n [i] = N [i]-1 for
  1269. the usual case of a random sample of N [i] observation vectors from the i-th population. When H0 is true
  1270. S = 1/(sum(i=1..k, n [i])) sum(i=1..k, n [i]*S [i])
  1271. is the pooled estimate of the common covariance matrix. The test statistic is
  1272. M = sum(i=1..k,n [i])*ln|S| - sum(i=1..k, n [i]*ln|S [i]|).
  1273. Box (1949), "A general distribution theory for a class of likelihood criteria",
  1274. Biomerika, vol 36, pp. 317-346. has shown that if the scale factor
  1275. C^(-1) = 1 - (2p^2+3p-1)/(6(p+1)(k-1)) * (sum(i=1..k, 1/n [i]) - 1 / sum(i=1..k, n [i])) is introduced,
  1276. the quatity M/C is approximately distributed as a chi-squared variate with (k-1)p(p+1)/2 degrees of freedom
  1277. as the n [i] become large.
  1278. It is well known that this likelihood ratio test is very sensitive to violations of the normality assumption,
  1279. and so other more robust procedures have been proposed.
  1280. */
  1281. double lnd;
  1282. try {
  1283. lnd = MATdeterminant_fromSymmetricMatrix (pool -> data.get());
  1284. } catch (MelderError) {
  1285. Melder_throw (U"Pooled covariance matrix is singular.");
  1286. }
  1287. double nsi = 0.0, m = ns * lnd; // First part of eq (3) page 297
  1288. for (integer i = 1; i <= numberOfMatrices; i ++) {
  1289. Covariance ci = my at [i];
  1290. try {
  1291. lnd = MATdeterminant_fromSymmetricMatrix (ci -> data.get());
  1292. } catch (MelderError) {
  1293. Melder_throw (U"Covariance matrix ", i, U" is singular.");
  1294. }
  1295. nsi += 1.0 / (ci -> numberOfObservations - 1);
  1296. m -= (ci -> numberOfObservations - 1) * lnd; // Last part of eq (3) page 297
  1297. }
  1298. /* Eq (4) page 297 */
  1299. double c1 = 1.0 - (2.0 * p * p + 3.0 * p - 1.0) / (6.0 * (p + 1) * (numberOfMatrices - 1)) * (nsi - 1.0 / ns);
  1300. df = (numberOfMatrices - 1.0) * p * (p + 1) / 2.0;
  1301. chisq = m * c1;
  1302. } else if (method == 2) { // Schott (2001) Wald 1
  1303. // T1 = sum(i=1..k, n [i]/n *tr((S [i]*S^-1)^2)- sum(i=1..k, sum(j=1..k, (n [i]/n)*(n [j]/n) *tr(S [i]*S^-1*S [j]*sS^-1))) =
  1304. // sum(i=1..k, (ni/n -(ni/n)^2) tr((si*s^-1)^2)
  1305. // - 2 * sum (i=1..k, sum(j=1..i-1, (ni/n)*(nj/n) *tr(si*s^-1*sj*s^-1)))
  1306. double trace = 0.0;
  1307. MATlowerCholeskyInverse_inplace (pool -> data.get(), nullptr);
  1308. autoMAT si = MATinverse_fromLowerCholeskyInverse (pool -> data.get());
  1309. for (integer i = 1; i <= numberOfMatrices; i ++) {
  1310. Covariance ci = my at [i];
  1311. double ni = ci -> numberOfObservations - 1;
  1312. autoMAT s1 = MATmul (ci -> data.get(), si.get());
  1313. double trace_ii = NUMtrace2_nn (s1.get(), s1.get());
  1314. trace += (ni / ns) * (1 - (ni / ns)) * trace_ii;
  1315. for (integer j = i + 1; j <= numberOfMatrices; j ++) {
  1316. Covariance cj = my at [j];
  1317. double nj = cj -> numberOfObservations - 1;
  1318. autoMAT s2 = MATmul (cj -> data.get(), si.get());
  1319. double trace_ij = NUMtrace2_nn (s1.get(), s2.get());
  1320. trace -= 2.0 * (ni / ns) * (nj / ns) * trace_ij;
  1321. }
  1322. }
  1323. df = (numberOfMatrices - 1) * p * (p + 1) / 2.0;
  1324. chisq = (ns / 2.0) * trace;
  1325. } else {
  1326. return;
  1327. }
  1328. if (p_prob) {
  1329. *p_prob = NUMchiSquareQ (chisq, df);
  1330. }
  1331. if (p_df) {
  1332. *p_df = df;
  1333. }
  1334. if (p_chisq) {
  1335. *p_chisq = chisq;
  1336. }
  1337. } catch (MelderError) {
  1338. Melder_throw (U"Equality coud not be tested.");
  1339. }
  1340. }
  1341. void Covariance_difference (Covariance me, Covariance thee, double *p_prob, double *p_chisq, double *p_df) {
  1342. integer p = my numberOfRows;
  1343. integer numberOfObservations = Melder_ifloor (my numberOfObservations);
  1344. double ln_me, ln_thee;
  1345. double chisq = undefined, df = undefined;
  1346. Melder_require (my numberOfRows == thy numberOfRows, U"Matrices should have equal dimensions.");
  1347. if (my numberOfObservations != thy numberOfObservations) {
  1348. numberOfObservations = Melder_ifloor (my numberOfObservations > thy numberOfObservations ?
  1349. thy numberOfObservations : my numberOfObservations) - 1;
  1350. Melder_warning (U"Covariance_difference: number of observations of matrices do not agree.\n"
  1351. U" The minimum size (", numberOfObservations, U") of the two is used.");
  1352. }
  1353. Melder_require (numberOfObservations > 1,
  1354. U"Number of observations too small.");
  1355. Melder_assert (thy data.ncol == p);
  1356. autoMAT linv = matrixcopy (thy data.get());
  1357. MATlowerCholeskyInverse_inplace (linv.get(), & ln_thee);
  1358. ln_me = MATdeterminant_fromSymmetricMatrix (my data.get());
  1359. /*
  1360. We need trace (A B^-1). We have A and the inverse L^(-1) of the
  1361. cholesky decomposition L^T L of B in the lower triangle + diagonal.
  1362. Always: tr (A B) = tr (B A)
  1363. tr (A B^-1) = tr (A (L L^T)^-1) = tr (A L^-1 (L^T)^-1)
  1364. trace = sum(i=1..p, j=1..p, l=max(i,j)..p, A [i] [j]Lm [l] [j]Lm [l] [i],
  1365. where Lm = L^(-1)
  1366. */
  1367. double trace = 0.0;
  1368. for (integer i = 1; i <= p; i ++) {
  1369. for (integer j = 1; j <= p; j ++) {
  1370. integer lp = std::max (j, i);
  1371. for (integer l = lp; l <= p; l ++) {
  1372. trace += my data [i] [j] * linv [l] [j] * linv [l] [i];
  1373. }
  1374. }
  1375. }
  1376. double l = (numberOfObservations - 1) * fabs (ln_thee - ln_me + trace - p);
  1377. chisq = l * fabs (1.0 - (2.0 * p + 1.0 - 2.0 / (p + 1)) / (numberOfObservations - 1) / 6.0);
  1378. df = p * (p + 1) / 2.0;
  1379. if (p_prob) {
  1380. *p_prob = NUMchiSquareQ (chisq, df);
  1381. }
  1382. if (p_chisq) {
  1383. *p_chisq = chisq;
  1384. }
  1385. if (p_df) {
  1386. *p_df = df;
  1387. }
  1388. }
  1389. static void checkOneIndex (TableOfReal me, integer index) {
  1390. Melder_require (index > 0 && index <= my numberOfColumns, U"Index should be in interval [1, ", my numberOfColumns, U"].");
  1391. }
  1392. static void checkTwoIndices (TableOfReal me, integer index1, integer index2) {
  1393. Melder_require (index1 > 0 && index1 <= my numberOfColumns && index2 > 0 && index2 <= my numberOfColumns,
  1394. U"Index should be in interval [1, ", my numberOfColumns, U"].");
  1395. Melder_require (index1 != index2,
  1396. U"Indices should be different.");
  1397. }
  1398. void Covariance_getSignificanceOfOneMean (Covariance me, integer index, double mu, double *p_prob, double *p_t, double *p_df) {
  1399. double var = my data [index] [index];
  1400. double prob = undefined, t = undefined, df = my numberOfObservations - 1.0;
  1401. checkOneIndex (me, index);
  1402. if (var > 0.0) {
  1403. t = (my centroid [index] - mu) / sqrt (var / my numberOfObservations);
  1404. if (p_prob) {
  1405. prob = 2.0 * NUMstudentQ (fabs (t), df);
  1406. }
  1407. }
  1408. if (p_prob) {
  1409. *p_prob = prob;
  1410. }
  1411. if (p_t) {
  1412. *p_t = t;
  1413. }
  1414. if (p_df) {
  1415. *p_df = df;
  1416. }
  1417. }
  1418. void Covariance_getSignificanceOfMeansDifference (Covariance me, integer index1, integer index2, double mu, int paired, int equalVariances, double *p_prob, double *p_t, double *p_df) {
  1419. integer n = Melder_ifloor (my numberOfObservations);
  1420. double prob = undefined, t = undefined;
  1421. double df = 2.0 * (n - 1);
  1422. checkTwoIndices (me, index1, index2);
  1423. double var1 = my data [index1] [index1];
  1424. double var2 = my data [index2] [index2];
  1425. double var_pooled = var1 + var2;
  1426. if (var_pooled == 0.0) {
  1427. Melder_warning (U"The pooled variance turned out to be zero. Check your data.");
  1428. goto end;
  1429. }
  1430. if (paired) {
  1431. var_pooled -= 2.0 * my data [index1] [index2];
  1432. df /= 2.0;
  1433. }
  1434. if (var_pooled == 0.0) {
  1435. Melder_warning (U"The pooled variance with the paired correction turned out to be zero. ");
  1436. prob = 0.0;
  1437. goto end;
  1438. }
  1439. t = (my centroid [index1] - my centroid [index2] - mu) / sqrt (var_pooled / n);
  1440. /*
  1441. Return two sided probabilty.
  1442. */
  1443. if (equalVariances) {
  1444. prob = 2.0 * NUMstudentQ (fabs (t), df);
  1445. } else {
  1446. df = (1.0 + 2.0 * var1 * var2 / (var1 * var1 + var2 * var2)) * (n - 1);
  1447. prob = NUMincompleteBeta (df / 2.0, 0.5, df / (df + t * t));
  1448. }
  1449. end:
  1450. if (p_prob) {
  1451. *p_prob = prob;
  1452. }
  1453. if (p_t) {
  1454. *p_t = t;
  1455. }
  1456. if (p_df) {
  1457. *p_df = df;
  1458. }
  1459. }
  1460. void Covariance_getSignificanceOfOneVariance (Covariance me, integer index, double sigmasq, double *p_prob, double *p_chisq, double *p_df) {
  1461. double var = my data [index] [index];
  1462. double prob = undefined, chisq = undefined;
  1463. double df = my numberOfObservations - 1.0;
  1464. checkOneIndex (me, index);
  1465. if (var > 0.0) {
  1466. chisq = df;
  1467. if (sigmasq > 0.0) {
  1468. chisq = df * var / sigmasq;
  1469. }
  1470. if (p_prob) {
  1471. prob = NUMchiSquareQ (chisq, df);
  1472. }
  1473. }
  1474. if (p_prob) {
  1475. *p_prob = prob;
  1476. }
  1477. if (p_chisq) {
  1478. *p_chisq = chisq;
  1479. }
  1480. if (p_df) {
  1481. *p_df = df;
  1482. }
  1483. }
  1484. void Covariance_getSignificanceOfVariancesRatio (Covariance me, integer index1, integer index2, double ratio, double *p_prob, double *p_f, double *p_df) {
  1485. double df = my numberOfObservations - 1.0, prob = undefined, f = undefined;
  1486. checkTwoIndices (me, index1, index2);
  1487. double var1 = my data [index1] [index1];
  1488. double var2 = my data [index2] [index2];
  1489. if (var1 > 0.0 && var2 > 0.0) {
  1490. double ratio2 = (var1 / var2) / ratio;
  1491. f = ratio2;
  1492. if (var2 > var1) {
  1493. ratio2 = (var2 / var1) * ratio;
  1494. }
  1495. if (p_prob) {
  1496. prob = 2.0 * NUMfisherQ (ratio2, df, df);
  1497. if (prob > 1.0) {
  1498. prob = 2.0 - prob;
  1499. }
  1500. }
  1501. }
  1502. if (p_prob) {
  1503. *p_prob = prob;
  1504. }
  1505. if (p_df) {
  1506. *p_df = df;
  1507. }
  1508. if (p_f) {
  1509. *p_f = f;
  1510. }
  1511. }
  1512. autoTableOfReal Correlation_confidenceIntervals (Correlation me, double confidenceLevel, integer numberOfTests, int method) {
  1513. try {
  1514. integer m_bonferroni = my numberOfRows * (my numberOfRows - 1) / 2;
  1515. Melder_require (confidenceLevel > 0 && confidenceLevel <= 1.0, U"Confidence level should be in interval (0-1).");
  1516. Melder_require (my numberOfObservations > 4, U"The number of observations should be greater than 4.");
  1517. Melder_require (numberOfTests >= 0, U"The \"number of tests\" should not be less than zero.");
  1518. if (numberOfTests == 0)
  1519. numberOfTests = m_bonferroni;
  1520. if (numberOfTests > m_bonferroni)
  1521. Melder_warning (U"The \"number of tests\" should not exceed the number of elements in the Correlation object.");
  1522. autoTableOfReal thee = TableOfReal_create (my numberOfRows, my numberOfRows);
  1523. TableOfReal_copyLabels (me, thee.get(), 1, 1);
  1524. // Obtain large-sample conservative multiple tests and intervals by the
  1525. // Bonferroni inequality and the Fisher z transformation.
  1526. // Put upper value of confidence intervals in upper part and lower
  1527. // values of confidence intervals in lower part of resulting table.
  1528. double z = NUMinvGaussQ ( (1 - confidenceLevel) / (2.0 * numberOfTests));
  1529. double zf = z / sqrt (my numberOfObservations - 3.0);
  1530. double two_n = 2.0 * my numberOfObservations;
  1531. for (integer i = 1; i <= my numberOfRows; i ++) {
  1532. for (integer j = i + 1; j <= my numberOfRows; j ++) {
  1533. double rij = my data [i] [j], rmin, rmax;
  1534. if (method == 2) {
  1535. // Fisher's approximation
  1536. double zij = 0.5 * log ( (1 + rij) / (1 - rij));
  1537. rmax = tanh (zij + zf);
  1538. rmin = tanh (zij - zf);
  1539. } else if (method == 1) {
  1540. // Ruben's approximation
  1541. double rs = rij / sqrt (1.0 - rij * rij);
  1542. double a = two_n - 3.0 - z * z;
  1543. double b = rs * sqrt ( (two_n - 3.0) * (two_n - 5.0));
  1544. double c = (a - 2.0) * rs * rs - 2.0 * z * z;
  1545. // Solve: a y^2 - 2b y + c = 0
  1546. // q = -0.5((-2b) + sgn(-2b) sqrt((-2b)^2 - 4ac))
  1547. // y1 = q/a; y2 = c/q;
  1548. double q, d = sqrt (b * b - a * c);
  1549. if (b > 0) {
  1550. d = - d;
  1551. }
  1552. q = b - d;
  1553. rmin = q / a;
  1554. rmin /= sqrt (1.0 + rmin * rmin);
  1555. rmax = c / q;
  1556. rmax /= sqrt (1.0 + rmax * rmax);
  1557. if (rmin > rmax) {
  1558. double t = rmin;
  1559. rmin = rmax; rmax = t;
  1560. }
  1561. } else {
  1562. rmax = rmin = 0;
  1563. }
  1564. thy data [i] [j] = rmax;
  1565. thy data [j] [i] = rmin;
  1566. }
  1567. thy data [i] [i] = 1;
  1568. }
  1569. return thee;
  1570. } catch (MelderError) {
  1571. Melder_throw (me, U": confidence intervals not created.");
  1572. }
  1573. }
  1574. void SSCP_getDiagonality_bartlett (SSCP me, integer numberOfContraints, double *chisq, double *prob, double *df) {
  1575. autoCorrelation c = SSCP_to_Correlation (me);
  1576. Correlation_testDiagonality_bartlett (c.get(), numberOfContraints, chisq, prob, df);
  1577. }
  1578. /* Morrison, page 118 */
  1579. void Correlation_testDiagonality_bartlett (Correlation me, integer numberOfContraints, double *p_chisq, double *p_prob, double *p_df) {
  1580. integer p = my numberOfRows;
  1581. double chisq = undefined, prob = undefined, df = p * (p -1) / 2.0;
  1582. if (numberOfContraints <= 0)
  1583. numberOfContraints = 1;
  1584. if (numberOfContraints > my numberOfObservations) {
  1585. Melder_warning (U"Correlation_testDiagonality_bartlett: number of constraints cannot exceed the number of observations.");
  1586. return;
  1587. }
  1588. if (my numberOfObservations >= numberOfContraints) {
  1589. double ln_determinant = MATdeterminant_fromSymmetricMatrix (my data.get());
  1590. chisq = - ln_determinant * (my numberOfObservations - numberOfContraints - (2.0 * p + 5.0) / 6.0);
  1591. if (p_prob) {
  1592. prob = NUMchiSquareQ (chisq, df);
  1593. }
  1594. }
  1595. if (p_chisq) {
  1596. *p_chisq = chisq;
  1597. }
  1598. if (p_prob) {
  1599. *p_prob = prob;
  1600. }
  1601. if (p_df) {
  1602. *p_df = df;
  1603. }
  1604. }
  1605. void SSCP_expand (SSCP me) {
  1606. // A reduced matrix has my numberOfRows < my numberOfColumns.
  1607. // After expansion:
  1608. // my numberOfRows == my numberOfColumns
  1609. // my storageNumberOfRows = my numberOfRows (before)
  1610. // my data (after) = my expansion;
  1611. // my expansion = my data (before)
  1612. // No expansion for a standard matrix or if already expanded and data has not changed!
  1613. if ((my expansionNumberOfRows == 0 && my numberOfRows == my numberOfColumns) ||
  1614. (my expansionNumberOfRows > 0 && ! my dataChanged))
  1615. return;
  1616. if (NUMisEmpty (my expansion.get()))
  1617. my expansion = MATzero (my numberOfColumns, my numberOfColumns);
  1618. for (integer ir = 1; ir <= my numberOfColumns; ir ++)
  1619. for (integer ic = ir; ic <= my numberOfColumns; ic ++) {
  1620. integer dij = labs (ir - ic);
  1621. my expansion [ir] [ic] = my expansion [ic] [ir] = dij < my numberOfRows ? my data [dij + 1] [ic] : 0.0;
  1622. }
  1623. // Now make 'my data' point to 'my expansion'
  1624. std::swap (my data, my expansion);
  1625. my expansionNumberOfRows = my numberOfRows;
  1626. my numberOfRows = my numberOfColumns;
  1627. my dataChanged = 0;
  1628. }
  1629. void SSCP_unExpand (SSCP me) {
  1630. if (my expansionNumberOfRows == 0)
  1631. return;
  1632. my data = my expansion.move();
  1633. my numberOfRows = my expansionNumberOfRows;
  1634. my expansionNumberOfRows = 0;
  1635. my dataChanged = 0;
  1636. }
  1637. void SSCP_expandLowerCholesky (SSCP me) {
  1638. if (NUMisEmpty (my lowerCholesky.get()))
  1639. my lowerCholesky = MATzero (my numberOfRows, my numberOfColumns);
  1640. if (my numberOfRows == 1) { // diagonal
  1641. my lnd = 0.0;
  1642. for (integer j = 1; j <= my numberOfColumns; j ++) {
  1643. my lowerCholesky [1] [j] = 1.0 / sqrt (my data [1] [j]); // inverse is 1/stddev
  1644. my lnd += log (my data [1] [j]); // diagonal elmnt is variance
  1645. }
  1646. } else {
  1647. for (integer i = 1; i <= my numberOfRows; i ++)
  1648. for (integer j = i; j <= my numberOfColumns; j ++)
  1649. my lowerCholesky [j] [i] = my lowerCholesky [i] [j] = my data [i] [j];
  1650. try {
  1651. MATlowerCholeskyInverse_inplace (my lowerCholesky.get(), & (my lnd));
  1652. } catch (MelderError) {
  1653. // singular matrix: arrange a diagonal only inverse.
  1654. my lnd = 0.0;
  1655. for (integer i = 1; i <= my numberOfRows; i ++) {
  1656. for (integer j = i; j <= my numberOfColumns; j ++)
  1657. my lowerCholesky [i] [j] = my lowerCholesky [j] [i] = ( i == j ? 1.0 / sqrt (my data [i] [i]) : 0.0 );
  1658. my lnd += log (my data [i] [i]);
  1659. }
  1660. my lnd *= 2.0;
  1661. }
  1662. }
  1663. }
  1664. void SSCP_unExpandLowerCholesky (SSCP me) {
  1665. my lowerCholesky.reset();
  1666. my lnd = 0.0;
  1667. }
  1668. void SSCP_expandPCA (SSCP me) {
  1669. if (my pca)
  1670. my pca.reset();
  1671. my pca = SSCP_to_PCA (me);
  1672. }
  1673. void SSCP_unExpandPCA (SSCP me) {
  1674. my pca.reset();
  1675. }
  1676. /* End of file SSCP.c */