1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408 |
- /* GaussianMixture.cpp
- *
- * Copyright (C) 2011-2018 David Weenink
- *
- * This code is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This code is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this work. If not, see <http://www.gnu.org/licenses/>.
- */
- /*
- djmw 20101021 Initial version
- */
- #include "Distributions_and_Strings.h"
- #include "GaussianMixture.h"
- #include "NUMmachar.h"
- #include "NUM2.h"
- #include "Strings_extensions.h"
- #include "oo_DESTROY.h"
- #include "GaussianMixture_def.h"
- #include "oo_COPY.h"
- #include "GaussianMixture_def.h"
- #include "oo_EQUAL.h"
- #include "GaussianMixture_def.h"
- #include "oo_CAN_WRITE_AS_ENCODING.h"
- #include "GaussianMixture_def.h"
- #include "oo_WRITE_TEXT.h"
- #include "GaussianMixture_def.h"
- #include "oo_READ_TEXT.h"
- #include "GaussianMixture_def.h"
- #include "oo_WRITE_BINARY.h"
- #include "GaussianMixture_def.h"
- #include "oo_READ_BINARY.h"
- #include "GaussianMixture_def.h"
- #include "oo_DESCRIPTION.h"
- #include "GaussianMixture_def.h"
- Thing_implement (GaussianMixture, Daata, 0);
- conststring32 GaussianMixture_criterionText (int criterion) {
- conststring32 criterionText [6] = { U"(1/n)*LLH", U"(1/n)*MML", U"(1/n)*BIC", U"(1/n)*AIC", U"(1/n)*AICc", U"(1/n)*CD_LLH" };
- return criterion >= 0 && criterion < 7 ? criterionText [criterion] : U"(1/n)*ln(p)";
- }
- void GaussianMixture_removeComponent (GaussianMixture me, integer component);
- void GaussianMixture_removeComponent_bookkeeping (GaussianMixture me, integer component, double **p, integer numberOfRows);
- int GaussianMixture_TableOfReal_getProbabilities (GaussianMixture me, TableOfReal thee, integer component, double **p);
- void GaussianMixture_TableOfReal_getGammas (GaussianMixture me, TableOfReal thee, double **gamma, double *lnp);
- double GaussianMixture_getLikelihoodValue (GaussianMixture me, double **p, integer numberOfRows, int onlyLikelyhood);
- void GaussianMixture_updateProbabilityMarginals (GaussianMixture me, double **p, integer numberOfRows);
- integer GaussianMixture_getNumberOfParametersInComponent (GaussianMixture me);
- static void NUMdvector_scaleAsProbabilities (double *v, integer n) {
- longdouble sum = 0.0;
- for (integer i = 1; i <= n; i ++) {
- sum += v [i];
- }
- if (sum > 0.0) {
- for (integer i = 1; i <= n; i ++) {
- v [i] /= sum;
- }
- }
- }
- static void GaussianMixture_updateCovariance (GaussianMixture me, integer component, double **data, integer numberOfRows, double **p) {
- if (component < 1 || component > my numberOfComponents) {
- return;
- }
- Covariance thee = my covariances->at [component];
- double mixprob = my mixingProbabilities [component];
- double gsum = p [numberOfRows + 1] [component];
- // update the means
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- thy centroid [j] = 0.0;
- for (integer i = 1; i <= numberOfRows; i ++) {
- double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
- thy centroid [j] += gamma * data [i] [j] ; // eq. Bishop 9.17
- }
- thy centroid [j] /= gsum;
- }
- // update covariance with the new mean
- if (thy numberOfRows == 1) { // 1xn covariance
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- thy data [1] [j] = 0.0;
- }
- for (integer i = 1; i <= numberOfRows; i ++) {
- double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
- double gdn = gamma / gsum;
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- double xj = thy centroid [j] - data [i] [j];
- thy data [1] [j] += gdn * xj * xj;
- }
- }
- } else { // nxn covariance
- for (integer j = 1; j <= thy numberOfRows; j ++)
- for (integer k = j; k <= thy numberOfColumns; k ++) {
- thy data [k] [j] = thy data [j] [k] = 0;
- }
- for (integer i = 1; i <= numberOfRows; i ++) {
- double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
- double gdn = gamma / gsum; // we cannot divide by nk - 1, this could cause instability
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- double xj = thy centroid [j] - data [i] [j];
- for (integer k = j; k <= thy numberOfColumns; k ++) {
- thy data [j] [k] = thy data [k] [j] += gdn * xj * (thy centroid [k] - data [i] [k]);
- }
- }
- }
- }
- thy numberOfObservations = my mixingProbabilities [component] * numberOfRows;
- }
- static void GaussianMixture_addCovarianceFraction (GaussianMixture me, integer im, Covariance him, double fraction) {
- if (im < 1 || im > my numberOfComponents || fraction == 0) {
- return;
- }
- Covariance thee = my covariances->at [im];
- // prevent instability: add lambda fraction of global covariances
- if (thy numberOfRows == 1) {
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- thy data [1] [j] += fraction * his data [j] [j];
- }
- } else {
- for (integer j = 1; j <= thy numberOfColumns; j++) {
- for (integer k = j; k <= thy numberOfColumns; k++) {
- thy data [k] [j] = thy data [j] [k] += fraction * his data [j] [k];
- }
- }
- }
- }
- void structGaussianMixture :: v_info () {
- our structDaata :: v_info ();
- MelderInfo_writeLine (U"Number of components: ", our numberOfComponents);
- MelderInfo_writeLine (U"Dimension of component: ", our dimension);
- MelderInfo_writeLine (U"Mixing probabilities:");
- for (integer im = 1; im <= numberOfComponents; im ++) {
- MelderInfo_writeLine (U" ", im, U": p = ", our mixingProbabilities [im], U" Name = \"", Thing_getName (our covariances->at [im]), U"\"");
- }
- }
- static void GaussianMixture_setLabelsFromTableOfReal (GaussianMixture me, TableOfReal thee) {
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- for (integer j = 1; j <= my dimension; j ++) {
- TableOfReal_setColumnLabel (cov, j, thy columnLabels [j].get());
- }
- }
- }
- // only from big to reduced or same
- static void Covariance_into_Covariance (Covariance me, Covariance thee) {
- Melder_require (my numberOfColumns == thy numberOfColumns, U"Dimensions should be equal.");
-
- SSCP_unExpand (thee); // to its original state
- thy numberOfObservations = my numberOfObservations;
- // copy centroid & column labels
- for (integer ic = 1; ic <= my numberOfColumns; ic ++) {
- thy centroid [ic] = my centroid [ic];
- }
- thy columnLabels. copyElementsFrom (my columnLabels.get());
- // Are the matrix sizes equal
- if (my numberOfRows == thy numberOfRows) {
- thy rowLabels. copyElementsFrom (my rowLabels.get());
- matrixcopy_preallocated (thy data.get(), my data.get());
- return;
- } else {
- for (integer ir = 1; ir <= my numberOfRows; ir ++) {
- for (integer ic = ir; ic <= my numberOfColumns; ic ++) {
- integer dij = ic - ir;
- if (dij < thy numberOfRows)
- thy data [dij + 1] [ic] = my data [ir] [ic];
- }
- }
- }
- }
- static void GaussianMixture_setDefaultMixtureNames (GaussianMixture me) {
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- Thing_setName (cov, Melder_cat (U"m", im));
- }
- }
- autoGaussianMixture GaussianMixture_create (integer numberOfComponents, integer dimension, integer storage) {
- try {
- autoGaussianMixture me = Thing_new (GaussianMixture);
- my numberOfComponents = numberOfComponents;
- my dimension = dimension;
- my mixingProbabilities = VECzero (numberOfComponents);
- my covariances = CovarianceList_create ();
- for (integer im = 1; im <= numberOfComponents; im ++) {
- autoCovariance cov = Covariance_create_reduceStorage (dimension, storage);
- my covariances -> addItemAtPosition_move (cov.move(), im);
- }
- for (integer im = 1; im <= numberOfComponents; im ++) {
- my mixingProbabilities [im] = 1.0 / numberOfComponents;
- }
- GaussianMixture_setDefaultMixtureNames (me.get());
- return me;
- } catch (MelderError) {
- Melder_throw (U"GaussianMixture not created.");
- }
- }
- /* c is double vector 1..dimension !!!!!! */
- int GaussianMixture_generateOneVector_inline (GaussianMixture me, VEC c, char32 **covname, VEC buf) {
- try {
- double p = NUMrandomUniform (0.0, 1.0);
- integer im = NUMgetIndexFromProbability (my mixingProbabilities.at, my numberOfComponents, p);
- Covariance thee = my covariances->at [im];
- *covname = thy name.get(); // BUG dangle
- if (thy numberOfRows == 1) { // 1xn reduced form
- for (integer i = 1; i <= my dimension; i ++) {
- c [i] = NUMrandomGauss (thy centroid [i], sqrt (thy data [1] [i]));
- }
- } else { // nxn
- if (! thy pca) {
- SSCP_expandPCA (thee); // on demand expanding
- }
- Covariance_PCA_generateOneVector_inline (thee, thy pca.get(), c, buf);
- }
- return 1;
- } catch (MelderError) {
- Melder_throw (me, U": vector not generated.");
- }
- }
- autoGaussianMixture TableOfReal_to_GaussianMixture_fromRowLabels (TableOfReal me, integer storage) {
- try {
- autoStrings rowLabels = TableOfReal_extractRowLabels (me);
- autoDistributions dist = Strings_to_Distributions (rowLabels.get());
- integer numberOfComponents = dist -> numberOfRows;
- autoGaussianMixture thee = GaussianMixture_create (numberOfComponents, my numberOfColumns, storage);
- GaussianMixture_setLabelsFromTableOfReal (thee.get(), me);
- for (integer i = 1; i <= numberOfComponents; i ++) {
- autoTableOfReal tab = TableOfReal_extractRowsWhereLabel (me, kMelder_string::EQUAL_TO, dist -> rowLabels [i].get());
- autoCovariance cov = TableOfReal_to_Covariance (tab.get());
- Covariance_into_Covariance (cov.get(), thy covariances->at [i]);
- Thing_setName (thy covariances->at [i], dist -> rowLabels [i].get());
- }
- for (integer im = 1; im <= numberOfComponents; im ++) {
- thy mixingProbabilities [im] = dist -> data [im] [1] / my numberOfRows;
- }
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no GaussianMixture created.");
- }
- }
- autoCovariance GaussianMixture_to_Covariance_between (GaussianMixture me) {
- try {
- autoCovariance thee = Covariance_create (my dimension);
- // First the new centroid, based on the mixture centroids
- double nobs_total = 0;
- for (integer i = 1; i <= my numberOfComponents; i ++) {
- Covariance him = my covariances->at [i];
- double nobs = his numberOfObservations; // the weighting factor
- for (integer ic = 1; ic <= my dimension; ic ++) {
- thy centroid [ic] += nobs * his centroid [ic];
- }
- nobs_total += nobs;
- }
- for (integer ic = 1; ic <= my dimension; ic ++) {
- thy centroid [ic] /= nobs_total;
- }
- Covariance cov = my covariances->at [1];
- for (integer i = 1; i <= thy numberOfColumns; i ++) {
- if (cov -> columnLabels [i]) {
- TableOfReal_setColumnLabel (thee.get(), i, cov -> columnLabels [i].get());
- TableOfReal_setRowLabel (thee.get(), i, cov -> columnLabels [i].get()); // if diagonal !
- }
- }
- // Between covariance, scale by the number of observations
- for (integer i = 1; i <= my numberOfComponents; i ++) {
- Covariance him = my covariances->at [i];
- double nobs = his numberOfObservations - 1; // we lose 1 degree of freedom
- for (integer ir = 1; ir <= my dimension; ir ++) {
- double dir = his centroid [ir] - thy centroid [ir];
- for (integer ic = ir; ic <= my dimension; ic ++) {
- double dic = his centroid [ic] - thy centroid [ic];
- thy data [ir] [ic] = thy data [ic] [ir] += nobs * dir * dic;
- }
- }
- }
- // Scale back
- for (integer ir = 1; ir <= my dimension; ir ++) {
- for (integer ic = ir; ic <= my dimension; ic ++) {
- thy data [ir] [ic] = thy data [ic] [ir] /= nobs_total;
- }
- }
- thy numberOfObservations = nobs_total;
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no Covariance (between) created.");
- }
- }
- autoCovariance GaussianMixture_to_Covariance_within (GaussianMixture me) {
- try {
- autoCovariance thee = Covariance_create (my dimension);
- for (integer i = 1; i <= my numberOfComponents; i ++) {
- double p = my mixingProbabilities [i];
- Covariance him = my covariances->at [i];
- if (his numberOfRows == 1) {
- for (integer ic = 1; ic <= my dimension; ic ++) {
- thy data [ic] [ic] += p * his data [1] [ic];
- }
- } else {
- for (integer ir = 1; ir <= my dimension; ir ++) {
- for (integer ic = ir; ic <= my dimension; ic ++) {
- thy data [ir] [ic] = thy data [ic] [ir] += p * his data [ir] [ic];
- }
- }
- }
- thy numberOfObservations += his numberOfObservations - 1; // we lose a degree of freedom?
- }
- // Leave centroid at 0 so we can add the within and between covariance nicely
- // Copy row labels from columns, because covar might be diagonal
- TableOfReal_copyLabels (my covariances->at [1], thee.get(), -1, 1);
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no Covariance (within) created.");
- }
- }
- autoCovariance GaussianMixture_to_Covariance_total (GaussianMixture me) {
- try {
- autoCovariance thee = GaussianMixture_to_Covariance_between (me);
- autoCovariance within = GaussianMixture_to_Covariance_within (me);
- for (integer ir = 1; ir <= my dimension; ir++) {
- for (integer ic = ir; ic <= my dimension; ic++) {
- thy data [ir] [ic] = thy data [ic] [ir] += within -> data [ir] [ic];
- }
- }
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no Covariance (total) created.");
- }
- }
- autoCovariance GaussianMixture_extractComponent (GaussianMixture me, integer component) {
- try {
- Melder_require (component > 0 && component <= my numberOfComponents, U"The component should be in [1, ", my numberOfComponents, U".");
- autoCovariance thee = Data_copy (my covariances->at [component]);
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no component extracted.");
- }
- }
- autoTableOfReal GaussianMixture_extractMixingProbabilities (GaussianMixture me) {
- try {
- autoTableOfReal thee = TableOfReal_create (my numberOfComponents, 2);
- TableOfReal_setColumnLabel (thee.get(), 1, U"p");
- TableOfReal_setColumnLabel (thee.get(), 2, U"n");
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- thy data [im] [1] = my mixingProbabilities [im];
- thy data [im] [2] = cov -> numberOfObservations;
- TableOfReal_setRowLabel (thee.get(), im, Thing_getName (cov));
- }
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no mixing probabilities extracted.");
- }
- }
- autoTableOfReal GaussianMixture_extractCentroids (GaussianMixture me) {
- try {
- autoTableOfReal thee = TableOfReal_create (my numberOfComponents, my dimension);
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- if (im == 1) {
- for (integer j = 1; j <= my dimension; j ++) {
- TableOfReal_setColumnLabel (thee.get(), j, cov -> columnLabels [j].get());
- }
- }
- TableOfReal_setRowLabel (thee.get(), im, Thing_getName (cov));
- for (integer j = 1; j <= my dimension; j ++) {
- thy data [im] [j] = cov -> centroid [j];
- }
- }
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no centroid extracted.");
- }
- }
- autoPCA GaussianMixture_to_PCA (GaussianMixture me) {
- try {
- autoCovariance him = GaussianMixture_to_Covariance_total (me);
- autoPCA thee = SSCP_to_PCA (him.get());
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no PCA calculated.");
- }
- }
- void GaussianMixture_getIntervalsAlongDirections (GaussianMixture me, integer d1, integer d2, double nsigmas, double *xmin, double *xmax, double *ymin, double *ymax) {
- *xmin = *xmax = *ymin = *ymax = undefined;
- Melder_require (d1 > 0 && d1 <= my dimension && d2 > 0 && d2 <= my dimension, U"Incorrect directions.");
-
- autoSSCPList sscps = SSCPList_extractTwoDimensions (my covariances->asSSCPList(), d1, d2);
- SSCPList_getEllipsesBoundingBoxCoordinates (sscps.get(), -nsigmas, 0, xmin, xmax, ymin, ymax);
- }
- void GaussianMixture_getIntervalAlongDirection (GaussianMixture me, integer d, double nsigmas, double *xmin, double *xmax) {
- double ymin, ymax;
- GaussianMixture_getIntervalsAlongDirections (me, d, d, nsigmas, xmin, xmax, &ymin, &ymax);
- }
- void GaussianMixture_PCA_getIntervalsAlongDirections (GaussianMixture me, PCA thee, integer d1, integer d2, double nsigmas, double *xmin, double *xmax, double *ymin, double *ymax) {
- Melder_require (my dimension == thy dimension,
- U"Dimensions should be equal.");
- Melder_require (d1 > 0 && d1 <= my dimension && d2 > 0 && d2 <= my dimension,
- U"Incorrect directions.");
-
- autoSSCPList sscps = SSCPList_toTwoDimensions (my covariances->asSSCPList(), thy eigenvectors.row (d1), thy eigenvectors.row (d2));
- SSCPList_getEllipsesBoundingBoxCoordinates (sscps.get(), -nsigmas, 0, xmin, xmax, ymin, ymax);
- }
- void GaussianMixture_PCA_getIntervalAlongDirection (GaussianMixture me, PCA thee, integer d, double nsigmas, double *xmin, double *xmax) {
- GaussianMixture_PCA_getIntervalsAlongDirections (me, thee, d, d, nsigmas, xmin, xmax, nullptr, nullptr);
- }
- void GaussianMixture_PCA_drawMarginalPdf (GaussianMixture me, PCA thee, Graphics g, integer d, double xmin, double xmax, double ymin, double ymax, integer npoints, integer nbins, int garnish) {
- if (my dimension != thy dimension || d < 1 || d > my dimension) {
- Melder_warning (U"Dimensions don't agree.");
- return;
- }
- if (npoints <= 1)
- npoints = 1000;
- double nsigmas = 2;
- if (xmax <= xmin)
- GaussianMixture_PCA_getIntervalAlongDirection (me, thee, d, nsigmas, & xmin, & xmax);
- double pmax = 0.0, dx = (xmax - xmin) / npoints, x1 = xmin + 0.5 * dx;
- double scalef = ( nbins <= 0 ? 1.0 : 1.0 ); // TODO
- autoVEC p = VECraw (npoints);
- for (integer i = 1; i <= npoints; i ++) {
- double x = x1 + (i - 1) * dx;
- Melder_assert (thy eigenvectors.ncol == thy dimension);
- p [i] = scalef * GaussianMixture_getMarginalProbabilityAtPosition (me, thy eigenvectors.row (d), x);
- if (p [i] > pmax)
- pmax = p [i];
- }
- if (ymin >= ymax) {
- ymin = 0.0;
- ymax = pmax;
- }
- Graphics_setInner (g);
- Graphics_setWindow (g, xmin, xmax, ymin, ymax);
- Graphics_function (g, p.at, 1, npoints, x1, xmax - 0.5 * dx);
- Graphics_unsetInner (g);
- if (garnish) {
- Graphics_drawInnerBox (g);
- Graphics_markBottom (g, xmin, true, true, false, nullptr);
- Graphics_markBottom (g, xmax, true, true, false, nullptr);
- Graphics_markLeft (g, ymin, true, true, false, nullptr);
- Graphics_markLeft (g, ymax, true, true, false, nullptr);
- }
- }
- void GaussianMixture_drawMarginalPdf (GaussianMixture me, Graphics g, integer d, double xmin, double xmax, double ymin, double ymax, integer npoints, integer nbins, int garnish) {
- if (d < 1 || d > my dimension) {
- Melder_warning (U"Dimension doesn't agree.");
- return;
- }
- if (npoints <= 1) {
- npoints = 1000;
- }
- autoVEC p (npoints, kTensorInitializationType::RAW);
- autoVEC v (my dimension, kTensorInitializationType::RAW);
- double nsigmas = 2.0;
- if (xmax <= xmin) {
- GaussianMixture_getIntervalAlongDirection (me, d, nsigmas, &xmin, &xmax);
- }
- double pmax = 0.0, dx = (xmax - xmin) / (npoints - 1);
- double scalef = ( nbins <= 0 ? 1.0 : 1.0 ); // TODO
-
- for (integer i = 1; i <= npoints; i++) {
- double x = xmin + (i - 1) * dx;
- for (integer k = 1; k <= my dimension; k++) {
- v [k] = k == d ? 1 : 0;
- }
- p [i] = scalef * GaussianMixture_getMarginalProbabilityAtPosition (me, v.get(), x);
- if (p [i] > pmax) {
- pmax = p [i];
- }
- }
- if (ymin >= ymax) {
- ymin = 0;
- ymax = pmax;
- }
- Graphics_setInner (g);
- Graphics_setWindow (g, xmin, xmax, ymin, ymax);
- Graphics_function (g, p.at, 1, npoints, xmin, xmax);
- Graphics_unsetInner (g);
- if (garnish) {
- Graphics_drawInnerBox (g);
- Graphics_markBottom (g, xmin, true, true, false, nullptr);
- Graphics_markBottom (g, xmax, true, true, false, nullptr);
- Graphics_markLeft (g, ymin, true, true, false, nullptr);
- Graphics_markLeft (g, ymax, true, true, false, nullptr);
- }
- }
- void GaussianMixture_PCA_drawConcentrationEllipses (GaussianMixture me, PCA him, Graphics g, double scale, int confidence, char32 *label, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, int garnish) {
- if (my dimension != his dimension) {
- Melder_warning (U"Dimensions don't agree.");
- return;
- }
- int d1_inverted = 0, d2_inverted = 0;
- if (d1 < 0) {
- d1 = labs (d1);
- Eigen_invertEigenvector (him, d1);
- d1_inverted = 1;
- }
- if (d2 < 0) {
- d2 = labs (d2);
- Eigen_invertEigenvector (him, d2);
- d2_inverted = 1;
- }
- autoSSCPList thee = SSCPList_toTwoDimensions (my covariances->asSSCPList(), his eigenvectors.row(d1), his eigenvectors.row (d2));
- if (d1_inverted) {
- Eigen_invertEigenvector (him, d1);
- }
- if (d2_inverted) {
- Eigen_invertEigenvector (him, d2);
- }
- SSCPList_drawConcentrationEllipses (thee.get(), g, -scale, confidence, label, 1, 2, xmin, xmax, ymin, ymax, fontSize, 0);
- if (garnish) {
- char32 llabel [40];
- Graphics_drawInnerBox (g);
- Graphics_marksLeft (g, 2, true, true, false);
- Melder_sprint (llabel,40, U"pc ", d2);
- Graphics_textLeft (g, true, llabel);
- Graphics_marksBottom (g, 2, true, true, false);
- Melder_sprint (llabel,40, U"pc ", d1);
- Graphics_textBottom (g, true, llabel);
- }
- }
- void GaussianMixture_drawConcentrationEllipses (GaussianMixture me, Graphics g, double scale, int confidence, char32 *label, int pcaDirections, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, int garnish) {
- if (d1 == 0 && d2 == 0) {
- d1 = 1;
- d2 = 2;
- }
- if (labs (d1) > my dimension || labs (d2) > my dimension) {
- return;
- }
- if (! pcaDirections) {
- SSCPList_drawConcentrationEllipses (my covariances->asSSCPList(), g, -scale, confidence, label,
- labs (d1), labs (d2), xmin, xmax, ymin, ymax, fontSize, garnish);
- return;
- }
- autoPCA him = GaussianMixture_to_PCA (me);
- GaussianMixture_PCA_drawConcentrationEllipses (me, him.get(), g, scale, confidence, label, d1, d2,
- xmin, xmax, ymin, ymax, fontSize, garnish);
- }
- void GaussianMixture_initialGuess (GaussianMixture me, TableOfReal thee, double nSigmas, double ru_range) {
- try {
- autoCovariance cov_t = TableOfReal_to_Covariance (thee);
- // assume equal probabilities for mixture
- // assume equal covariance matrices
- // spread centroids on an ellips in pc1-pc2 plane?
- if (my dimension == 1) {
- double dm = 2.0 * sqrt (cov_t -> data [1] [1]) / my numberOfComponents;
- double m1 = cov_t -> centroid [1] - dm;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance covi = my covariances->at [im];
- covi -> centroid [1] = m1;
- covi -> data [1] [1] = dm * dm;
- m1 += dm;
- covi -> numberOfObservations = thy numberOfRows / my numberOfComponents;
- }
- } else {
- autoPCA pca = SSCP_to_PCA (cov_t.get());
- autoSSCP s2d = SSCP_toTwoDimensions (cov_t.get(), pca -> eigenvectors.row(1), pca -> eigenvectors.row(2));
- autoConfiguration means2d = Configuration_create (my numberOfComponents, 2);
- double a, b, cs, sn;
- NUMeigencmp22 (s2d -> data [1] [1], s2d -> data [1] [2], s2d -> data [2] [2], & a, & b, & cs, & sn);
- a = nSigmas * sqrt (a);
- b = nSigmas * sqrt (b);
- double angle = 0.0, angle_inc = NUM2pi / my numberOfComponents;
- for (integer im = 1; im <= my numberOfComponents; im++, angle += angle_inc) {
- double xc = a * (1.0 + NUMrandomUniform (-ru_range, ru_range)) * cos (angle);
- double yc = b * (1.0 + NUMrandomUniform (-ru_range, ru_range)) * sin (angle);
- means2d -> data [im] [1] = s2d -> centroid [1] + xc * cs - yc * sn;
- means2d -> data [im] [2] = s2d -> centroid [2] + xc * sn + yc * cs;
- }
- // reconstruct the n-dimensional means from the 2-d from the eigenvectors
- autoTableOfReal means = PCA_Configuration_to_TableOfReal_reconstruct (pca.get(), means2d.get());
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance covi = my covariances->at [im];
- for (integer ic = 1; ic <= my dimension; ic ++) {
- covi -> centroid [ic] = means -> data [im] [ic];
- }
- covi -> numberOfObservations = thy numberOfRows / my numberOfComponents;
- }
- // Trick: use the new means to get the between SSCP,
- // if there is only one component the cov_b will be zero!
- autoCovariance cov_b = GaussianMixture_to_Covariance_between (me);
- double var_t = SSCP_getTotalVariance (cov_t.get());
- double var_b = SSCP_getTotalVariance (cov_b.get());
- if (var_b >= var_t) { // we have chosen our initial values too far out
- double scale = 0.9 * sqrt (var_t / var_b);
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance covi = my covariances->at [im];
- for (integer ic = 1; ic <= my dimension; ic ++) {
- covi -> centroid [ic] -= (1.0 - scale) * (covi -> centroid [ic] - cov_b -> centroid [ic]);
- }
- }
- cov_b = GaussianMixture_to_Covariance_between (me);
- }
- // Within variances are now (total - between) / numberOfComponents;
- for (integer ir = 1; ir <= my dimension; ir ++) {
- for (integer ic = ir; ic <= my dimension; ic ++) {
- double scalef = my numberOfComponents == 1 ? 1.0 : (var_b / var_t) / my numberOfComponents;
- cov_t -> data [ic] [ir] = cov_t -> data [ir] [ic] *= scalef;
- }
- }
- // Copy them
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- if (cov -> numberOfRows == 1) {
- for (integer ic = 1; ic <= my dimension; ic ++) {
- cov -> data [1] [ic] = cov_t -> data [ic] [ic];
- }
- } else {
- for (integer ir = 1; ir <= my dimension; ir ++) {
- for (integer ic = ir; ic <= my dimension; ic ++) {
- cov -> data [ir] [ic] = cov -> data [ic] [ir] = cov_t -> data [ir] [ic];
- }
- }
- }
- }
- }
- } catch (MelderError) {
- Melder_throw (me, U" & ", thee, U": no initial guess possible.");
- }
- }
- autoClassificationTable GaussianMixture_TableOfReal_to_ClassificationTable (GaussianMixture me, TableOfReal thee) {
- try {
- autoClassificationTable him = ClassificationTable_create (thy numberOfRows, my numberOfComponents);
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- SSCP_expandLowerCholesky (cov);
- TableOfReal_setColumnLabel (him.get(), im, Thing_getName (cov));
- }
- double ln2pid = -0.5 * my dimension * log (NUM2pi);
- autoNUMvector<double> lnN (1, my numberOfComponents);
- for (integer i = 1; i <= thy numberOfRows; i ++) {
- double psum = 0.0;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- double dsq = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(i), cov -> centroid.get());
- lnN [im] = ln2pid - 0.5 * (cov -> lnd + dsq);
- psum += his data [i] [im] = my mixingProbabilities [im] * exp (lnN [im]);
- }
- if (psum == 0) { // p's might be too small (underflow), make the largest equal to sfmin
- double lnmax = -1e308;
- integer imm = 1;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- if (lnN [im] > lnmax) {
- lnmax = lnN [im];
- } imm = im;
- }
- his data [i] [imm] = NUMfpp -> sfmin;
- }
- // for (integer im = 1; im <= my numberOfComponents; im++) { his data [i] [im] /= psum; }
- TableOfReal_setRowLabel (him.get(), i, thy rowLabels [i].get());
- }
- return him;
- } catch (MelderError) {
- Melder_throw (U"No ClassificationTable created from GaussianMixture & TableOfReal.");
- }
- }
- void GaussianMixture_TableOfReal_getGammas (GaussianMixture me, TableOfReal thee, double **gamma, double *p_lnp) {
- try {
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- SSCP_expandLowerCholesky (cov);
- }
- double *nk = gamma [thy numberOfRows + 1];
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- nk [im] = 0.0;
- }
- double lnp = 0.0;
- double ln2pid = - 0.5 * my dimension * log (NUM2pi);
- autoVEC lnN = VECraw (my numberOfComponents);
- for (integer i = 1; i <= thy numberOfRows; i ++) {
- double rowsum = 0.0;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- double dsq = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(i), cov -> centroid.get());
- lnN [im] = ln2pid - 0.5 * (cov -> lnd + dsq);
- gamma [i] [im] = my mixingProbabilities [im] * exp (lnN [im]); // eq. Bishop 9.16
- rowsum += gamma [i] [im];
- }
- // If the gamma [i]'s are too small, their sum will be zero and the scaling will overflow
- if (rowsum == 0.0) {
- continue; // This is ok because gamma [i]'s will all be zero
- }
- // scale gamma and get log(likehood) (Bishop eq. 9.40)
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- gamma [i] [im] /= rowsum; // eq. Bishop 9.16
- nk [im] += gamma [i] [im]; // eq. Bishop 9.18
- lnp += gamma [i] [im] * (log (my mixingProbabilities [im]) + lnN [im]); // eq. Bishop 9.40
- }
- }
- if (p_lnp) {
- *p_lnp = lnp;
- }
- } catch (MelderError) {
- Melder_throw (me, U" & ", thee, U": no gammas.");
- }
- }
- void GaussianMixture_splitComponent (GaussianMixture me, integer component) {
- try {
- Melder_require (component > 0 && component <= my numberOfComponents, U"The component should be in [1, ", my numberOfComponents, U"].");
-
- Covariance thee = my covariances->at [component];
- // Always new PCA because we cannot be sure of data unchanged.
- SSCP_expandPCA (thee);
- autoCovariance cov1 = Data_copy (thee);
- autoCovariance cov2 = Data_copy (thee);
- SSCP_unExpandPCA (cov1.get());
- SSCP_unExpandPCA (cov2.get());
- // Eventually cov1 replaces component, cov2 at end
- autoVEC mixingProbabilities = VECraw (my numberOfComponents + 1);
- for (integer i = 1; i <= my numberOfComponents; i ++) {
- mixingProbabilities [i] = my mixingProbabilities [i];
- }
- double gamma = 0.5, lambda = 0.5, eta = 0.5, mu = 0.5;
- mixingProbabilities [component] = gamma * my mixingProbabilities [component];
- mixingProbabilities [my numberOfComponents + 1] = (1.0 - gamma) * my mixingProbabilities [component];
- double mp12 = mixingProbabilities [component] / mixingProbabilities [my numberOfComponents + 1];
- double factor1 = (eta - eta * lambda * lambda - 1.0) / gamma + 1.0;
- double factor2 = (eta * lambda * lambda - eta - lambda * lambda) / (1.0 - gamma) + 1.0;
- double *ev = thy pca -> eigenvectors [1];
- double d2 = thy pca -> eigenvalues [1];
- for (integer i = 1; i <= my dimension; i ++) {
- cov1 -> centroid [i] -= (1.0 / sqrt (mp12)) * sqrt (d2) * mu * ev [i];
- cov2 -> centroid [i] += sqrt (mp12) * sqrt (d2) * mu * ev [i];
- if (thy numberOfRows == 1) { // diagonal
- cov1 -> data [1] [i] = cov1 -> data [1] [i] / mp12 + factor1 * d2;
- cov1 -> data [1] [i] = cov2 -> data [i] [i] * mp12 + factor2 * d2;
- } else {
- for (integer j = i; j <= my dimension; j++) {
- cov1 -> data [j] [i] = cov1 -> data [i] [j] = cov1 -> data [i] [j] / mp12 + factor1 * d2 * ev [i] * ev [j];
- cov2 -> data [j] [i] = cov2 -> data [i] [j] = cov2 -> data [i] [j] * mp12 + factor2 * d2 * ev [i] * ev [j];
- }
- }
- }
- cov1 -> numberOfObservations *= gamma;
- cov2 -> numberOfObservations *= 1.0 - gamma;
- // Replace cov1 at component + add cov2. If something goes wrong we should be able to restore original!
- try {
- Thing_setName (cov2.get(), Melder_cat (Thing_getName (cov2.get()), U"-", my numberOfComponents + 1));
- my covariances -> addItem_move (cov2.move());
- } catch (MelderError) {
- Melder_throw (me, U" cannot add new component.");
- }
- my covariances -> replaceItem_move (cov1.move(), component);
- my numberOfComponents ++;
- my mixingProbabilities = mixingProbabilities.move();
- } catch (MelderError) {
- Melder_throw (me, U": component ", component, U" cannot be split.");
- }
- }
- int GaussianMixture_TableOfReal_getProbabilities (GaussianMixture me, TableOfReal thee, integer component, double **p) {
- try {
- double ln2pid = my dimension * log (NUM2pi);
- // Update only one component or all?
- integer icb = 1, ice = my numberOfComponents;
- if (component > 0 && component <= my numberOfComponents) {
- icb = ice = component;
- }
- for (integer ic = icb; ic <= ice; ic ++) {
- Covariance him = my covariances->at [ic];
- SSCP_expandLowerCholesky (him);
- for (integer i = 1; i <= thy numberOfRows; i++) {
- double dsq = NUMmahalanobisDistance (his lowerCholesky.get(), thy data.row(i), his centroid.get());
- double prob = exp (- 0.5 * (ln2pid + his lnd + dsq));
- prob = prob < 1e-300 ? 1e-300 : prob; // prevent p from being zero
- p [i] [ic] = prob;
- }
- }
- GaussianMixture_updateProbabilityMarginals (me, p, thy numberOfRows);
- return 1;
- } catch (MelderError) {
- Melder_throw (me, U" & ", thee, U": no probabilies could be calculated.");
- }
- }
- void GaussianMixture_expandPCA (GaussianMixture me) {
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance him = my covariances->at [im];
- Melder_require (his numberOfRows > 1, U"Nothing to expand.");
- his pca = SSCP_to_PCA (him);
- }
- }
- void GaussianMixture_unExpandPCA (GaussianMixture me) {
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- SSCP_unExpandPCA (my covariances->at [im]);
- }
- }
- void GaussianMixture_TableOfReal_improveLikelihood (GaussianMixture me, TableOfReal thee, double delta_lnp, integer maxNumberOfIterations, double lambda, int criterion) {
- try {
- conststring32 criterionText = GaussianMixture_criterionText (criterion);
- // The global covariance matrix is added with scaling coefficient lambda during updating the
- // mixture covariances to prevent numerical instabilities.
- autoCovariance covg = TableOfReal_to_Covariance (thee);
- autoNUMmatrix<double> pp (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);
- double *nk = pp [thy numberOfRows + 1]; // last row has the column marginals n(k)
- if (! GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
- Melder_throw (U"Iteration not started. Too many components?");
- }
- double lnp = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
- integer iter = 0;
- autoMelderProgress progress (U"Improve likelihood...");
- try {
- double lnp_prev, lnp_start = lnp / thy numberOfRows;
- do {
- // E-step: get responsabilities (gamma) with current parameters
- // See C. Bishop (2006), Pattern reconition and machine learning, Springer, page 439...
- lnp_prev = lnp;
- iter ++;
- // M-step: 1. new means & covariances
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- GaussianMixture_updateCovariance (me, im, thy data.at, thy numberOfRows, pp.peek());
- GaussianMixture_addCovarianceFraction (me, im, covg.get(), lambda);
- }
- // M-step: 2. new mixingProbabilities
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- my mixingProbabilities [im] = nk [im] / thy numberOfRows;
- }
- if (! GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
- break;
- }
- lnp = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
- Melder_progress ((double) iter / (double) maxNumberOfIterations, criterionText, U": ", lnp / thy numberOfRows, U", L0: ", lnp_start);
- } while (fabs ((lnp - lnp_prev) / lnp_prev) > delta_lnp && iter < maxNumberOfIterations);
- } catch (MelderError) {
- Melder_clearError ();
- }
- // During EM, covariances were underestimated by a factor of (n-1)/n. Correction now.
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- if (cov -> numberOfObservations > 1.5) {
- if (cov -> numberOfRows == 1) {
- for (integer j = 1; j <= thy numberOfColumns; j ++) {
- cov -> data [1] [j] *= cov -> numberOfObservations / (cov -> numberOfObservations - 1);
- }
- } else {
- for (integer j = 1; j <= thy numberOfColumns; j ++)
- for (integer k = j; k <= thy numberOfColumns; k ++) {
- cov -> data [j] [k] = cov -> data [k] [j] *= cov -> numberOfObservations / (cov -> numberOfObservations - 1.0);
- }
- }
- }
- }
- } catch (MelderError) {
- Melder_throw (me, U" & ", thee, U": likelihood cannot be improved.");
- }
- }
- integer GaussianMixture_getNumberOfParametersInComponent (GaussianMixture me) {
- Melder_assert (my covariances->size > 0);
- Covariance thee = my covariances->at [1];
- // if diagonal) d (means) + d (stdev)
- // else n + n(n+1)/2
- return thy numberOfRows == 1 ? 2 * thy numberOfColumns : thy numberOfColumns * (thy numberOfColumns + 3) / 2;
- }
- void GaussianMixture_updateProbabilityMarginals (GaussianMixture me, double **p, integer numberOfRows) {
- integer nocp1 = my numberOfComponents + 1, norp1 = numberOfRows + 1;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- p [norp1] [ic] = 0.0;
- }
- for (integer i = 1; i <= numberOfRows; i ++) {
- double rowsum = 0.0;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- rowsum += my mixingProbabilities [ic] * p [i] [ic];
- }
- p [i] [nocp1] = rowsum;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- p [norp1] [ic] += my mixingProbabilities [ic] * p [i] [ic] / p [i] [nocp1];
- }
- }
- }
- void GaussianMixture_removeComponent_bookkeeping (GaussianMixture me, integer component, double **p, integer numberOfRows) {
- // p is (NumberOfRows+1) by (numberOfComponents+1) !
- for (integer i = 1; i <= numberOfRows + 1; i ++) {
- for (integer ic = component; ic <= my numberOfComponents; ic ++) {
- p [i] [ic] = p [i] [ic + 1];
- }
- }
- GaussianMixture_updateProbabilityMarginals (me, p, numberOfRows);
- GaussianMixture_removeComponent (me, component);
- }
- double GaussianMixture_TableOfReal_getLikelihoodValue (GaussianMixture me, TableOfReal thee, int criterion) {
- double value = undefined;
- autoNUMmatrix<double> pp (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);
- if (GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
- value = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
- }
- return value;
- }
- double GaussianMixture_getLikelihoodValue (GaussianMixture me, double **p, integer numberOfRows, int criterion) {
- // Because we try to _maximize_ a criterion, all criteria are negative numbers.
- if (criterion == GaussianMixture_CD_LIKELIHOOD) {
- double lnpcd = 0.0;
- for (integer i = 1; i <= numberOfRows; i ++) {
- double psum = 0, lnsum = 0;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- double pp = my mixingProbabilities [ic] * p [i] [ic];
- psum += pp;
- lnsum += pp * log (pp);
- }
- if (psum > 0) {
- lnpcd += lnsum / psum;
- }
- }
- return lnpcd;
- }
- // The common factor for all other criteria is the log(likelihood)
- double lnp = 0.0;
- for (integer i = 1; i <= numberOfRows; i ++) {
- double psum = 0.0;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- psum += my mixingProbabilities [ic] * p [i] [ic];
- }
- if (psum > 0.0) {
- lnp += log (psum);
- }
- }
- if (criterion == GaussianMixture_LIKELIHOOD) {
- return lnp;
- }
- double npars = GaussianMixture_getNumberOfParametersInComponent (me), np = npars * my numberOfComponents;
- if (criterion == GaussianMixture_MML) {
- /* Equation (15) in
- Mario A.T. Figueiredo, and Anil K. Jain, Unsupervised Learning of Finite Mixture Models :
- IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE, VOL. 24, NO. 3, MARCH 2002
- L(theta,Y)= N/2*sum(m=1..k, log(n*alpha [k]/12)) +k/2*ln(n/12) +k(N+1)/2
- - log (sum(i=1..n, sum(m=1..k, alpha [k]*p(k))))
- */
- double logmpn = 0.0;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- logmpn += log (my mixingProbabilities [ic]);
- }
- // a rewritten L(theta,Y) is
- return lnp - 0.5 * my numberOfComponents * (npars + 1) * (log (numberOfRows / 12.0) + 1.0)
- + 0.5 * npars * logmpn;
- } else if (criterion == GaussianMixture_BIC) {
- return 2.0 * lnp - np * log (numberOfRows);
- } else if (criterion == GaussianMixture_AIC) {
- return 2.0 * (lnp - np);
- } else if (criterion == GaussianMixture_AICC) {
- np = npars * my numberOfComponents;
- return 2.0 * (lnp - np * (numberOfRows / (numberOfRows - np - 1.0)));
- }
- return lnp;
- }
- autoGaussianMixture GaussianMixture_TableOfReal_to_GaussianMixture_CEMM (GaussianMixture gm, TableOfReal thee, integer minNumberOfComponents, double delta_l, integer maxNumberOfIterations, double lambda, int criterion) {
- try {
- conststring32 criterionText = GaussianMixture_criterionText (criterion);
- bool deleteWeakComponents = ( minNumberOfComponents > 0 );
- autoGaussianMixture me = Data_copy (gm);
- autoNUMmatrix<double> p (1, thy numberOfRows + 2, 1, my numberOfComponents + 1);
- double *gsum = p [thy numberOfRows + 1]; // convenience array with sums
- autoCovariance covg = TableOfReal_to_Covariance (thee);
- double npars = GaussianMixture_getNumberOfParametersInComponent (me.get());
- double nparsd2 = ( deleteWeakComponents ? npars / 2.0 : 0.0 );
- // Initial E-step: Update all p's.
- GaussianMixture_TableOfReal_getProbabilities (me.get(), thee, 0, p.peek());
- double lnew = GaussianMixture_getLikelihoodValue (me.get(), p.peek(), thy numberOfRows, criterion);
- autoMelderProgress progress (U"Gaussian mixture...");
- autoGaussianMixture best;
- try {
- double lstart = lnew / thy numberOfRows;
- integer iter = 0, component;
- double lmax = -1e308, lprev;
- while (my numberOfComponents >= minNumberOfComponents) {
- do {
- iter ++;
- component = 1;
- lprev = lnew;
- while (component <= my numberOfComponents) {
- // M-step for means and covariances
- GaussianMixture_updateProbabilityMarginals (me.get(), p.peek(), thy numberOfRows);
- GaussianMixture_updateCovariance (me.get(), component, thy data.at, thy numberOfRows, p.peek());
- if (lambda > 0) {
- GaussianMixture_addCovarianceFraction (me.get(), component, covg.get(), lambda);
- }
- // Now check if enough support for a component exists
- double support_im = gsum [component] - nparsd2, support = 0.0;
- for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
- double support_ic = gsum [ic] - nparsd2;
- if (support_ic > 0.0) {
- support += support_ic;
- }
- }
- my mixingProbabilities [component] = ( support_im > 0.0 ? support_im : 0.0 );
- if (support > 0.0) {
- my mixingProbabilities [component] /= support;
- }
- NUMdvector_scaleAsProbabilities (my mixingProbabilities.at, my numberOfComponents);
- if (my mixingProbabilities [component] > 0.0) { // update p for component
- GaussianMixture_TableOfReal_getProbabilities (me.get(), thee, component, p.peek());
- component ++;
- } else {
- // "Remove" the component column from p by shifting row values
- GaussianMixture_removeComponent_bookkeeping (me.get(), component, p.peek(), thy numberOfRows);
- // Now numberOfComponents is one less!
- // MelderInfo_writeLine (U"Removed component ", component);
- }
- }
- // L(theta,Y)=N/2 sum(m=1..k, log(n*mixingP [m]/12))+k/2log(n/12)+k/2(N+1)-loglikelihood reduces to:
- // k/2 (N+1){log(n/12)+1}+N/2sum(m=1..k,mixingP [m]) - loglikelihood
- lnew = GaussianMixture_getLikelihoodValue (me.get(), p.peek(), thy numberOfRows, criterion);
- Melder_progress ((double) iter / (double) maxNumberOfIterations, U", ", criterionText, U": ",
- lnew / thy numberOfRows, U"\nComponents: ", my numberOfComponents, U"\nL0: ", lstart);
- } while (lnew > lprev && fabs ((lprev - lnew) / lnew) > delta_l && iter < maxNumberOfIterations);
- if (lnew > lmax) {
- best = Data_copy (me.get());
- lmax = lnew;
- if (! deleteWeakComponents) {
- break; // TODO was got end; is dat hetzelfde?
- }
- }
- if (my numberOfComponents > 1) { // remove smallest component
- component = 1;
- double mpmin = my mixingProbabilities [component];
- for (integer ic = 2; ic <= my numberOfComponents; ic ++) {
- if (my mixingProbabilities [ic] < mpmin) {
- mpmin = my mixingProbabilities [ic];
- component = ic;
- }
- }
- GaussianMixture_removeComponent_bookkeeping (me.get(), component, p.peek(), thy numberOfRows);
- } else {
- break;
- }
- }
- } catch (MelderError) {
- Melder_clearError ();
- }
- return best;
- } catch (MelderError) {
- Melder_throw (U"GaussianMixture not improved.");
- }
- }
- // The numberOfElemnts per covariance needs to be updated later
- void GaussianMixture_removeComponent (GaussianMixture me, integer component) {
- if (component < 1 || component > my numberOfComponents || my numberOfComponents == 1) {
- return;
- }
- my covariances -> removeItem (component);
- my numberOfComponents --;
- for (integer ic = component; ic <= my numberOfComponents; ic ++) {
- my mixingProbabilities [ic] = my mixingProbabilities [ic + 1];
- }
- NUMdvector_scaleAsProbabilities (my mixingProbabilities.at, my numberOfComponents);
- }
- autoGaussianMixture TableOfReal_to_GaussianMixture (TableOfReal me, integer numberOfComponents, double delta_lnp, integer maxNumberOfIterations, double lambda, int storage, int criterion) {
- try {
- Melder_require (my numberOfRows >= 2 * numberOfComponents,
- U"The number of data points should at least be twice the number of components.");
- autoGaussianMixture thee = GaussianMixture_create (numberOfComponents, my numberOfColumns, storage);
- GaussianMixture_setLabelsFromTableOfReal (thee.get(), me);
- GaussianMixture_initialGuess (thee.get(), me, 1.0, 0.05);
- if (maxNumberOfIterations <= 0)
- return thee;
- GaussianMixture_TableOfReal_improveLikelihood (thee.get(), me, delta_lnp, maxNumberOfIterations, lambda, criterion);
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": no GaussianMixture created.");
- }
- }
- autoCorrelation GaussianMixture_TableOfReal_to_Correlation (GaussianMixture me, TableOfReal thee) {
- try {
- Melder_require (my dimension == thy numberOfColumns,
- U"Dimensions should be equal.");
- autoClassificationTable ct = GaussianMixture_TableOfReal_to_ClassificationTable (me, thee);
- autoCorrelation him = ClassificationTable_to_Correlation_columns (ct.get());
- return him;
- } catch (MelderError) {
- Melder_throw (U"Correlation not created from GaussianMixture & TableOfReal.");
- }
- }
- double GaussianMixture_getProbabilityAtPosition_string (GaussianMixture me, conststring32 vector_string) {
- autostring32vector vector = STRVECtokenize (vector_string);
- autoVEC pos = {my dimension, kTensorInitializationType::ZERO};
- for (integer i = 1; i <= vector.size; i ++) {
- pos [i] = Melder_atof (vector [i].get());
- if (i == my dimension)
- break;
- }
- double p = GaussianMixture_getProbabilityAtPosition (me, pos.get());
- return p;
- }
- double GaussianMixture_getMarginalProbabilityAtPosition (GaussianMixture me, constVEC pos, double x) {
- longdouble p = 0.0;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- double pim = Covariance_getMarginalProbabilityAtPosition (my covariances->at [im], pos, x);
- p += my mixingProbabilities [im] * pim;
- }
- return (double) p;
- }
- double GaussianMixture_getProbabilityAtPosition (GaussianMixture me, constVEC xpos) {
- longdouble p = 0.0;
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- double pim = Covariance_getProbabilityAtPosition (my covariances->at [im], xpos);
- p += my mixingProbabilities [im] * pim;
- }
- return (double) p;
- }
- autoMatrix GaussianMixture_PCA_to_Matrix_density (GaussianMixture me, PCA thee, integer d1, integer d2, double xmin, double xmax, integer nx, double ymin, double ymax, integer ny) {
- try {
- Melder_require (my dimension == thy dimension, U"Dimensions should be equal.");
- Melder_require (d1 <= thy numberOfEigenvalues && d2 <= thy numberOfEigenvalues, U"Direction index too high.");
-
- autoVEC v = { my dimension, kTensorInitializationType::ZERO };
- if (xmax == xmin || ymax == ymin) {
- double xmind, xmaxd, ymind, ymaxd, nsigmas = 2.0;
- GaussianMixture_PCA_getIntervalsAlongDirections (me, thee, d1, d2, nsigmas, &xmind, &xmaxd, &ymind, &ymaxd);
- if (xmax == xmin) {
- xmin = xmind;
- xmax = xmaxd;
- }
- if (ymax == ymin) {
- ymin = ymind;
- ymax = ymaxd;
- }
- }
- // xmin,xmax and ymin,ymax are coordinates in the pc1 vs pc2 plane
- double dx = fabs (xmax - xmin) / nx, dy = fabs (ymax - ymin) / ny;
- double x1 = xmin + 0.5 * dx, y1 = ymin + 0.5 * dy;
- autoMatrix him = Matrix_create (xmin, xmax, nx, dx, x1, ymin, ymax, ny, dy, y1);
- for (integer i = 1; i <= ny; i ++) {
- double y = y1 + (i - 1) * dy;
- for (integer j = 1; j <= nx; j ++) {
- double x = x1 + (j - 1) * dx;
- for (integer k = 1; k <= my dimension; k ++) {
- v [k] = x * thy eigenvectors [d1] [k] + y * thy eigenvectors [d2] [k];
- }
- his z [i] [j] = GaussianMixture_getProbabilityAtPosition (me, v.get());
- }
- }
- return him;
- } catch (MelderError) {
- Melder_throw (me, U" & ", thee, U": no Matrix density created.");
- }
- }
- autoTableOfReal GaussianMixture_to_TableOfReal_randomSampling (GaussianMixture me, integer numberOfPoints) {
- try {
- Covariance cov = my covariances->at [1];
- autoTableOfReal thee = TableOfReal_create (numberOfPoints, my dimension);
- autoVEC buf (my dimension, kTensorInitializationType::RAW);
- thy columnLabels. copyElementsFrom_upTo (cov -> columnLabels.get(), my dimension);
- // ppgb FIXME: is the number of column labels in the covariance equal to the number of dimensions? If so, document or assert.
- for (integer i = 1; i <= numberOfPoints; i ++) {
- char32 *covname;
- GaussianMixture_generateOneVector_inline (me, thy data.row (i), & covname, buf.get());
- TableOfReal_setRowLabel (thee.get(), i, covname);
- }
- GaussianMixture_unExpandPCA (me);
- return thee;
- } catch (MelderError) {
- GaussianMixture_unExpandPCA (me);
- Melder_throw (U"TableOfReal with random sampling not created.");
- }
- }
- autoTableOfReal GaussianMixture_TableOfReal_to_TableOfReal_BHEPNormalityTests (GaussianMixture me, TableOfReal thee, double h) {
- try {
- integer n = thy numberOfRows, d = thy numberOfColumns, nocp1 = my numberOfComponents + 1;
-
- Melder_require (d == my dimension, U"Dimensions should agree.");
-
- // We cannot use a classification table because this could weigh a far-off data point with high probability
- autoNUMmatrix<double> p (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);
- GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, p.peek());
- // prob, beta, tnbo, lnmu, lnvar, ndata, ncol
- autoTableOfReal him = TableOfReal_create (my numberOfComponents, 7);
- // labels
- integer iprob = 1, ih = 2, itnb = 3, ilnmu = 4, ilnvar = 5, indata = 6, id = 7;
- conststring32 label [8] = { U"", U"p", U"h", U"tnb", U"lnmu", U"lnvar", U"ndata", U"d" };
- for (integer icol = 1; icol <= 7; icol ++) {
- TableOfReal_setColumnLabel (him.get(), icol, label [icol]);
- }
- for (integer irow = 1; irow <= my numberOfComponents; irow ++) {
- Covariance cov = my covariances->at [irow];
- TableOfReal_setRowLabel (him.get(), irow, Thing_getName (cov));
- }
- for (integer icol = 1 ; icol <= my numberOfComponents; icol ++) {
- his data [icol] [indata] = p [n + 1] [icol];
- }
- for (integer im = 1; im <= my numberOfComponents; im ++) {
- Covariance cov = my covariances->at [im];
- double mixingP = my mixingProbabilities [im];
- double nd = his data [im] [indata], d2 = d / 2.0;
- double beta = h > 0.0 ? NUMsqrt1_2 / h : NUMsqrt1_2 * pow ( (1.0 + 2.0 * d) / 4.0, 1.0 / (d + 4.0)) * pow (nd, 1.0 / (d + 4.0));
- double beta2 = beta * beta, beta4 = beta2 * beta2, beta8 = beta4 * beta4;
- double gamma = 1.0 + 2.0 * beta2, gamma2 = gamma * gamma, gamma4 = gamma2 * gamma2;
- double delta = 1.0 + beta2 * (4.0 + 3.0 * beta2), delta2 = delta * delta;
- double mu = 1.0 - pow (gamma, -d2) * (1.0 + d * beta2 / gamma + d * (d + 2.0) * beta4 / (2.0 * gamma2));
- double var = 2.0 * pow (1.0 + 4.0 * beta2, -d2)
- + 2.0 * pow (gamma, -d) * (1.0 + 2.0 * d * beta4 / gamma2 + 3.0 * d * (d + 2.0) * beta8 / (4.0 * gamma4))
- - 4.0 * pow (delta, -d2) * (1.0 + 3.0 * d * beta4 / (2.0 * delta) + d * (d + 2.0) * beta8 / (2.0 * delta2));
- double mu2 = mu * mu;
- double prob = undefined, tnb = undefined, lnmu = undefined, lnvar = undefined;
- try {
- SSCP_expandLowerCholesky (cov);
- } catch (MelderError) {
- tnb = 4.0 * nd;
- }
- double djk, djj, sumjk = 0.0, sumj = 0.0;
- double b1 = beta2 / 2.0, b2 = b1 / (1.0 + beta2);
- /* Heinze & Wagner (1997), page 3
- We use d [j] [k] = ||Y [j]-Y [k]||^2 = (Y [j]-Y [k])'S^(-1)(Y [j]-Y [k])
- So d [j] [k]= d [k] [j] and d [j] [j] = 0
- */
- for (integer j = 1; j <= n; j ++) {
- double wj = p [j] [nocp1] > 0.0 ? mixingP * p [j] [im] / p [j] [nocp1] : 0.0;
- for (integer k = 1; k < j; k ++) {
- djk = NUMmahalanobisDistance_chi (cov -> lowerCholesky.at, thy data [j], thy data [k], d, d);
- double w = p [k] [nocp1] > 0.0 ? wj * mixingP * p [k] [im] / p [k] [nocp1] : 0.0;
- sumjk += 2.0 * w * exp (-b1 * djk); // factor 2 because d [j] [k] == d [k] [j]
- }
- sumjk += wj * wj; // for k == j. Is this ok now for probability weighing ????
- djj = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(j), cov -> centroid.get());
- sumj += wj * exp (-b2 * djj);
- }
- tnb = (1.0 / nd) * sumjk - 2.0 * pow (1.0 + beta2, - d2) * sumj + nd * pow (gamma, - d2); // n *
- his data [im] [ilnmu] = lnmu = 0.5 * log (mu2 * mu2 / (mu2 + var)); //log (sqrt (mu2 * mu2 /(mu2 + var)));
- his data [im] [ilnvar] = lnvar = sqrt (log ( (mu2 + var) / mu2));
- his data [im] [iprob] = prob = NUMlogNormalQ (tnb, lnmu, lnvar);
- his data [im] [ih] = NUMsqrt1_2 / beta;
- his data [im] [id] = d;
- his data [im] [itnb] = tnb;
- }
- return him;
- } catch (MelderError) {
- Melder_throw (U"TableOfReal for BHEP not created.");
- }
- }
- /* End of file GaussianMixture.cpp 1555*/
|