emily
/
praat


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408
							/* GaussianMixture.cpp
 *
 * Copyright (C) 2011-2018 David Weenink
 *
 * This code is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This code is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this work. If not, see <http://www.gnu.org/licenses/>.
 */

/*
  djmw 20101021 Initial version
*/
#include "Distributions_and_Strings.h"
#include "GaussianMixture.h"
#include "NUMmachar.h"
#include "NUM2.h"
#include "Strings_extensions.h"

#include "oo_DESTROY.h"
#include "GaussianMixture_def.h"
#include "oo_COPY.h"
#include "GaussianMixture_def.h"
#include "oo_EQUAL.h"
#include "GaussianMixture_def.h"
#include "oo_CAN_WRITE_AS_ENCODING.h"
#include "GaussianMixture_def.h"
#include "oo_WRITE_TEXT.h"
#include "GaussianMixture_def.h"
#include "oo_READ_TEXT.h"
#include "GaussianMixture_def.h"
#include "oo_WRITE_BINARY.h"
#include "GaussianMixture_def.h"
#include "oo_READ_BINARY.h"
#include "GaussianMixture_def.h"
#include "oo_DESCRIPTION.h"
#include "GaussianMixture_def.h"

Thing_implement (GaussianMixture, Daata, 0);

conststring32 GaussianMixture_criterionText (int criterion) {
	conststring32 criterionText [6] =  { U"(1/n)*LLH", U"(1/n)*MML", U"(1/n)*BIC", U"(1/n)*AIC", U"(1/n)*AICc", U"(1/n)*CD_LLH" };
	return criterion >= 0 && criterion < 7 ? criterionText [criterion] : U"(1/n)*ln(p)";
}

void GaussianMixture_removeComponent (GaussianMixture me, integer component);
void GaussianMixture_removeComponent_bookkeeping (GaussianMixture me, integer component, double **p, integer numberOfRows);
int GaussianMixture_TableOfReal_getProbabilities (GaussianMixture me, TableOfReal thee, integer component, double **p);
void GaussianMixture_TableOfReal_getGammas (GaussianMixture me, TableOfReal thee, double **gamma, double *lnp);
double GaussianMixture_getLikelihoodValue (GaussianMixture me, double **p, integer numberOfRows, int onlyLikelyhood);
void GaussianMixture_updateProbabilityMarginals (GaussianMixture me, double **p, integer numberOfRows);
integer GaussianMixture_getNumberOfParametersInComponent (GaussianMixture me);

static void NUMdvector_scaleAsProbabilities (double *v, integer n) {
	longdouble sum = 0.0;
	for (integer i = 1; i <= n; i ++) {
		sum += v [i];
	}
	if (sum > 0.0) {
		for (integer i = 1; i <= n; i ++) {
			v [i] /= sum;
		}
	}
}

static void GaussianMixture_updateCovariance (GaussianMixture me, integer component, double **data, integer numberOfRows, double **p) {
	if (component < 1 || component > my numberOfComponents) {
		return;
	}
	Covariance thee = my covariances->at [component];

	double mixprob = my mixingProbabilities [component];
	double gsum = p [numberOfRows + 1] [component];
	// update the means

	for (integer j = 1; j <= thy numberOfColumns; j ++) {
		thy centroid [j] = 0.0;
		for (integer i = 1; i <= numberOfRows; i ++) {
			double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
			thy centroid [j] += gamma * data [i] [j] ; // eq. Bishop 9.17
		}
		thy centroid [j] /= gsum;
	}

	// update covariance with the new mean

	if (thy numberOfRows == 1) { // 1xn covariance
		for (integer j = 1; j <= thy numberOfColumns; j ++) {
			thy data [1] [j] = 0.0;
		}
		for (integer i = 1; i <= numberOfRows; i ++) {
			double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
			double gdn = gamma / gsum;
			for (integer j = 1; j <= thy numberOfColumns; j ++) {
				double xj = thy centroid [j] - data [i] [j];
				thy data [1] [j] += gdn * xj * xj;
			}
		}
	} else { // nxn covariance
		for (integer j = 1; j <= thy numberOfRows; j ++)
			for (integer k = j; k <= thy numberOfColumns; k ++) {
				thy data [k] [j] = thy data [j] [k] = 0;
			}
		for (integer i = 1; i <= numberOfRows; i ++) {
			double gamma = mixprob * p [i] [component] / p [i] [my numberOfComponents + 1];
			double gdn = gamma / gsum; // we cannot divide by nk - 1, this could cause instability
			for (integer j = 1; j <= thy numberOfColumns; j ++) {
				double xj = thy centroid [j] - data [i] [j];
				for (integer k = j; k <= thy numberOfColumns; k ++) {
					thy data [j] [k] = thy data [k] [j] += gdn * xj * (thy centroid [k] - data [i] [k]);
				}
			}
		}
	}
	thy numberOfObservations = my mixingProbabilities [component] * numberOfRows;
}

static void GaussianMixture_addCovarianceFraction (GaussianMixture me, integer im, Covariance him, double fraction) {
	if (im < 1 || im > my numberOfComponents || fraction == 0) {
		return;
	}

	Covariance thee = my covariances->at [im];

	// prevent instability: add lambda fraction of global covariances

	if (thy numberOfRows == 1) {
		for (integer j = 1; j <= thy numberOfColumns; j ++) {
			thy data [1] [j] += fraction * his data [j] [j];
		}
	} else {
		for (integer j = 1; j <= thy numberOfColumns; j++) {
			for (integer k = j; k <= thy numberOfColumns; k++) {
				thy data [k] [j] = thy data [j] [k] += fraction * his data [j] [k];
			}
		}
	}
}

void structGaussianMixture :: v_info () {
	our structDaata :: v_info ();
	MelderInfo_writeLine (U"Number of components: ", our numberOfComponents);
	MelderInfo_writeLine (U"Dimension of component: ", our dimension);
	MelderInfo_writeLine (U"Mixing probabilities:");
	for (integer im = 1; im <= numberOfComponents; im ++) {
		MelderInfo_writeLine (U"  ", im, U": p = ", our mixingProbabilities [im], U"  Name =  \"", Thing_getName (our covariances->at [im]), U"\"");
	}
}

static void GaussianMixture_setLabelsFromTableOfReal (GaussianMixture me, TableOfReal thee) {
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		Covariance cov = my covariances->at [im];
		for (integer j = 1; j <= my dimension; j ++) {
			TableOfReal_setColumnLabel (cov, j, thy columnLabels [j].get());
		}
	}
}

// only from big to reduced or same
static void Covariance_into_Covariance (Covariance me, Covariance thee) {
	Melder_require (my numberOfColumns == thy numberOfColumns, U"Dimensions should be equal.");
	
	SSCP_unExpand (thee); // to its original state

	thy numberOfObservations = my numberOfObservations;
	// copy centroid & column labels
	for (integer ic = 1; ic <= my numberOfColumns; ic ++) {
		thy centroid [ic] = my centroid [ic];
	}
	thy columnLabels. copyElementsFrom (my columnLabels.get());
	// Are the matrix sizes equal
	if (my numberOfRows == thy numberOfRows) {
		thy rowLabels. copyElementsFrom (my rowLabels.get());
		matrixcopy_preallocated (thy data.get(), my data.get());
		return;
	} else {
		for (integer ir = 1; ir <= my numberOfRows; ir ++) {
			for (integer ic = ir; ic <= my numberOfColumns; ic ++) {
				integer dij = ic - ir;
				if (dij < thy numberOfRows)
					thy data [dij + 1] [ic] = my data [ir] [ic];
			}
		}
	}
}

static void GaussianMixture_setDefaultMixtureNames (GaussianMixture me) {
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		Covariance cov = my covariances->at [im];
		Thing_setName (cov, Melder_cat (U"m", im));
	}
}

autoGaussianMixture GaussianMixture_create (integer numberOfComponents, integer dimension, integer storage) {
	try {
		autoGaussianMixture me = Thing_new (GaussianMixture);
		my numberOfComponents = numberOfComponents;
		my dimension = dimension;
		my mixingProbabilities = VECzero (numberOfComponents);
		my covariances = CovarianceList_create ();
		for (integer im = 1; im <= numberOfComponents; im ++) {
			autoCovariance cov = Covariance_create_reduceStorage (dimension, storage);
			my covariances -> addItemAtPosition_move (cov.move(), im);
		}
		for (integer im = 1; im <= numberOfComponents; im ++) {
			my mixingProbabilities [im] = 1.0 / numberOfComponents;
		}
		GaussianMixture_setDefaultMixtureNames (me.get());
		return me;
	} catch (MelderError) {
		Melder_throw (U"GaussianMixture not created.");
	}
}

/* c is double vector 1..dimension !!!!!! */
int GaussianMixture_generateOneVector_inline (GaussianMixture me, VEC c, char32 **covname, VEC buf) {
	try {
		double p = NUMrandomUniform (0.0, 1.0);
		integer im = NUMgetIndexFromProbability (my mixingProbabilities.at, my numberOfComponents, p);
		Covariance thee = my covariances->at [im];
		*covname = thy name.get();   // BUG dangle
		if (thy numberOfRows == 1) { // 1xn reduced form
			for (integer i = 1; i <= my dimension; i ++) {
				c [i] = NUMrandomGauss (thy centroid [i], sqrt (thy data [1] [i]));
			}
		} else { // nxn
			if (! thy pca) {
				SSCP_expandPCA (thee);    // on demand expanding
			}
			Covariance_PCA_generateOneVector_inline (thee, thy pca.get(), c, buf);
		}
		return 1;
	} catch (MelderError) {
		Melder_throw (me, U": vector not generated.");
	}
}

autoGaussianMixture TableOfReal_to_GaussianMixture_fromRowLabels (TableOfReal me, integer storage) {
	try {
		autoStrings rowLabels = TableOfReal_extractRowLabels (me);
		autoDistributions dist = Strings_to_Distributions (rowLabels.get());
		integer numberOfComponents = dist -> numberOfRows;

		autoGaussianMixture thee = GaussianMixture_create (numberOfComponents, my numberOfColumns, storage);

		GaussianMixture_setLabelsFromTableOfReal (thee.get(), me);

		for (integer i = 1; i <= numberOfComponents; i ++) {
			autoTableOfReal tab = TableOfReal_extractRowsWhereLabel (me, kMelder_string::EQUAL_TO, dist -> rowLabels [i].get());
			autoCovariance cov = TableOfReal_to_Covariance (tab.get());
			Covariance_into_Covariance (cov.get(), thy covariances->at [i]);
			Thing_setName (thy covariances->at [i], dist -> rowLabels [i].get());
		}
		for (integer im = 1; im <= numberOfComponents; im ++) {
			thy mixingProbabilities [im] = dist -> data [im] [1] / my numberOfRows;
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no GaussianMixture created.");
	}
}

autoCovariance GaussianMixture_to_Covariance_between (GaussianMixture me) {
	try {
		autoCovariance thee = Covariance_create (my dimension);

		//	First the new centroid, based on the mixture centroids

		double nobs_total = 0;
		for (integer i = 1; i <= my numberOfComponents; i ++) {
			Covariance him = my covariances->at [i];
			double nobs = his numberOfObservations; // the weighting factor
			for (integer ic = 1; ic <= my dimension; ic ++) {
				thy centroid [ic] += nobs * his centroid [ic];
			}
			nobs_total += nobs;
		}
		for (integer ic = 1; ic <= my dimension; ic ++) {
			thy centroid [ic] /= nobs_total;
		}

		Covariance cov = my covariances->at [1];
		for (integer i = 1; i <= thy numberOfColumns; i ++) {
			if (cov -> columnLabels [i]) {
				TableOfReal_setColumnLabel (thee.get(), i, cov -> columnLabels [i].get());
				TableOfReal_setRowLabel (thee.get(), i, cov -> columnLabels [i].get()); // if diagonal !
			}
		}

		// Between covariance, scale by the number of observations

		for (integer i = 1; i <= my numberOfComponents; i ++) {
			Covariance him = my covariances->at [i];
			double nobs = his numberOfObservations - 1; // we lose 1 degree of freedom
			for (integer ir = 1; ir <= my dimension; ir ++) {
				double dir = his centroid [ir] - thy centroid [ir];
				for (integer ic = ir; ic <= my dimension; ic ++) {
					double dic = his centroid [ic] - thy centroid [ic];
					thy data [ir] [ic] = thy data [ic] [ir] += nobs * dir * dic;
				}
			}
		}

		// Scale back

		for (integer ir = 1; ir <= my dimension; ir ++) {
			for (integer ic = ir; ic <= my dimension; ic ++) {
				thy data [ir] [ic] = thy data [ic] [ir] /= nobs_total;
			}
		}

		thy numberOfObservations = nobs_total;
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Covariance (between) created.");
	}
}

autoCovariance GaussianMixture_to_Covariance_within (GaussianMixture me) {
	try {
		autoCovariance thee = Covariance_create (my dimension);

		for (integer i = 1; i <= my numberOfComponents; i ++) {
			double p = my mixingProbabilities [i];
			Covariance him = my covariances->at [i];
			if (his numberOfRows == 1) {
				for (integer ic = 1; ic <= my dimension; ic ++) {
					thy data [ic] [ic] += p * his data [1] [ic];
				}
			} else {
				for (integer ir = 1; ir <= my dimension; ir ++) {
					for (integer ic = ir; ic <= my dimension; ic ++) {
						thy data [ir] [ic] = thy data [ic] [ir] += p * his data [ir] [ic];
					}
				}
			}
			thy numberOfObservations += his numberOfObservations - 1; // we lose a degree of freedom?
		}

		// Leave centroid at 0 so we can add the within and between covariance nicely
		// Copy row labels from columns, because covar might be diagonal
		TableOfReal_copyLabels (my covariances->at [1], thee.get(), -1, 1);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Covariance (within) created.");
	}
}

autoCovariance GaussianMixture_to_Covariance_total (GaussianMixture me) {
	try {
		autoCovariance thee = GaussianMixture_to_Covariance_between (me);
		autoCovariance within = GaussianMixture_to_Covariance_within (me);

		for (integer ir = 1; ir <= my dimension; ir++) {
			for (integer ic = ir; ic <= my dimension; ic++) {
				thy data [ir] [ic] = thy data [ic] [ir] += within -> data [ir] [ic];
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Covariance (total) created.");
	}
}

autoCovariance GaussianMixture_extractComponent (GaussianMixture me, integer component) {
	try {
		Melder_require (component > 0 && component <= my numberOfComponents, U"The component should be in [1, ", my numberOfComponents, U".");

		autoCovariance thee = Data_copy (my covariances->at [component]);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no component extracted.");
	}
}

autoTableOfReal GaussianMixture_extractMixingProbabilities (GaussianMixture me) {
	try {
		autoTableOfReal thee = TableOfReal_create (my numberOfComponents, 2);
		TableOfReal_setColumnLabel (thee.get(), 1, U"p");
		TableOfReal_setColumnLabel (thee.get(), 2, U"n");
		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			thy data [im] [1] = my mixingProbabilities [im];
			thy data [im] [2] = cov -> numberOfObservations;
			TableOfReal_setRowLabel (thee.get(), im, Thing_getName (cov));
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no mixing probabilities extracted.");
	}
}

autoTableOfReal GaussianMixture_extractCentroids (GaussianMixture me) {
	try {
		autoTableOfReal thee = TableOfReal_create (my numberOfComponents, my dimension);

		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			if (im == 1) {
				for (integer j = 1; j <= my dimension; j ++) {
					TableOfReal_setColumnLabel (thee.get(), j, cov -> columnLabels [j].get());
				}
			}
			TableOfReal_setRowLabel (thee.get(), im, Thing_getName (cov));
			for (integer j = 1; j <= my dimension; j ++) {
				thy data [im] [j] = cov -> centroid [j];
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no centroid extracted.");
	}
}

autoPCA GaussianMixture_to_PCA (GaussianMixture me) {
	try {
		autoCovariance him = GaussianMixture_to_Covariance_total (me);
		autoPCA thee = SSCP_to_PCA (him.get());
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no PCA calculated.");
	}
}

void GaussianMixture_getIntervalsAlongDirections (GaussianMixture me, integer d1, integer d2, double nsigmas, double *xmin, double *xmax, double *ymin, double *ymax) {
	*xmin = *xmax = *ymin = *ymax = undefined;
	Melder_require (d1 > 0 && d1 <= my dimension && d2 > 0 && d2 <= my dimension, U"Incorrect directions.");
	
	autoSSCPList sscps = SSCPList_extractTwoDimensions (my covariances->asSSCPList(), d1, d2);
	SSCPList_getEllipsesBoundingBoxCoordinates (sscps.get(), -nsigmas, 0, xmin, xmax, ymin, ymax);
}

void GaussianMixture_getIntervalAlongDirection (GaussianMixture me, integer d, double nsigmas, double *xmin, double *xmax) {
	double ymin, ymax;
	GaussianMixture_getIntervalsAlongDirections (me, d, d, nsigmas, xmin, xmax, &ymin, &ymax);
}

void GaussianMixture_PCA_getIntervalsAlongDirections (GaussianMixture me, PCA thee, integer d1, integer d2, double nsigmas, double *xmin, double *xmax, double *ymin, double *ymax) {
	Melder_require (my dimension == thy dimension, 
		U"Dimensions should be equal.");
	Melder_require (d1 > 0 && d1 <= my dimension && d2 > 0 && d2 <= my dimension, 
		U"Incorrect directions.");
	
	autoSSCPList sscps = SSCPList_toTwoDimensions (my covariances->asSSCPList(), thy eigenvectors.row (d1), thy eigenvectors.row (d2));
	SSCPList_getEllipsesBoundingBoxCoordinates (sscps.get(), -nsigmas, 0, xmin, xmax, ymin, ymax);
}


void GaussianMixture_PCA_getIntervalAlongDirection (GaussianMixture me, PCA thee, integer d, double nsigmas, double *xmin, double *xmax) {
	GaussianMixture_PCA_getIntervalsAlongDirections (me, thee, d, d, nsigmas, xmin, xmax, nullptr, nullptr);
}

void GaussianMixture_PCA_drawMarginalPdf (GaussianMixture me, PCA thee, Graphics g, integer d, double xmin, double xmax, double ymin, double ymax, integer npoints, integer nbins, int garnish) {
	if (my dimension != thy dimension || d < 1 || d > my dimension) {
		Melder_warning (U"Dimensions don't agree.");
		return;
	}
	if (npoints <= 1)
		npoints = 1000;
	double nsigmas = 2;
	if (xmax <= xmin)
		GaussianMixture_PCA_getIntervalAlongDirection (me, thee, d, nsigmas, & xmin, & xmax);
	double pmax = 0.0, dx = (xmax - xmin) / npoints, x1 = xmin + 0.5 * dx;
	double scalef = ( nbins <= 0 ? 1.0 : 1.0 ); // TODO
	autoVEC p = VECraw (npoints);
	for (integer i = 1; i <= npoints; i ++) {
		double x = x1 + (i - 1) * dx;
		Melder_assert (thy eigenvectors.ncol == thy dimension);
		p [i] = scalef * GaussianMixture_getMarginalProbabilityAtPosition (me, thy eigenvectors.row (d), x);
		if (p [i] > pmax)
			pmax = p [i];
	}
	if (ymin >= ymax) {
		ymin = 0.0;
		ymax = pmax;
	}
	Graphics_setInner (g);
	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
	Graphics_function (g, p.at, 1, npoints, x1, xmax - 0.5 * dx);
	Graphics_unsetInner (g);
	if (garnish) {
		Graphics_drawInnerBox (g);
		Graphics_markBottom (g, xmin, true, true, false, nullptr);
		Graphics_markBottom (g, xmax, true, true, false, nullptr);
		Graphics_markLeft (g, ymin, true, true, false, nullptr);
		Graphics_markLeft (g, ymax, true, true, false, nullptr);
	}
}

void GaussianMixture_drawMarginalPdf (GaussianMixture me, Graphics g, integer d, double xmin, double xmax, double ymin, double ymax, integer npoints, integer nbins, int garnish) {
	if (d < 1 || d > my dimension) {
		Melder_warning (U"Dimension doesn't agree.");
		return;
	}
	if (npoints <= 1) {
		npoints = 1000;
	}
	autoVEC p (npoints, kTensorInitializationType::RAW);
	autoVEC v (my dimension, kTensorInitializationType::RAW);

	double nsigmas = 2.0;
	if (xmax <= xmin) {
		GaussianMixture_getIntervalAlongDirection (me, d, nsigmas, &xmin, &xmax);
	}

	double pmax = 0.0, dx = (xmax - xmin) / (npoints - 1);
	double scalef = ( nbins <= 0 ? 1.0 : 1.0 ); // TODO
	
	for (integer i = 1; i <= npoints; i++) {
		double x = xmin + (i - 1) * dx;
		for (integer k = 1; k <= my dimension; k++) {
			v [k] = k == d ? 1 : 0;
		}
		p [i] = scalef * GaussianMixture_getMarginalProbabilityAtPosition (me, v.get(), x);
		if (p [i] > pmax) {
			pmax = p [i];
		}
	}
	if (ymin >= ymax) {
		ymin = 0;
		ymax = pmax;
	}

	Graphics_setInner (g);
	Graphics_setWindow (g, xmin, xmax, ymin, ymax);
	Graphics_function (g, p.at, 1, npoints, xmin, xmax);
	Graphics_unsetInner (g);

	if (garnish) {
		Graphics_drawInnerBox (g);
		Graphics_markBottom (g, xmin, true, true, false, nullptr);
		Graphics_markBottom (g, xmax, true, true, false, nullptr);
		Graphics_markLeft (g, ymin, true, true, false, nullptr);
		Graphics_markLeft (g, ymax, true, true, false, nullptr);
	}
}

void GaussianMixture_PCA_drawConcentrationEllipses (GaussianMixture me, PCA him, Graphics g, double scale, int confidence, char32 *label, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, int garnish) {
	if (my dimension != his dimension) {
		Melder_warning (U"Dimensions don't agree.");
		return;
	}
	int d1_inverted = 0, d2_inverted = 0;

	if (d1 < 0) {
		d1 = labs (d1);
		Eigen_invertEigenvector (him, d1);
		d1_inverted = 1;
	}
	if (d2 < 0) {
		d2 = labs (d2);
		Eigen_invertEigenvector (him, d2);
		d2_inverted = 1;
	}

	autoSSCPList thee = SSCPList_toTwoDimensions (my covariances->asSSCPList(), his eigenvectors.row(d1), his eigenvectors.row (d2));

	if (d1_inverted) {
		Eigen_invertEigenvector (him, d1);
	}
	if (d2_inverted) {
		Eigen_invertEigenvector (him, d2);
	}

	SSCPList_drawConcentrationEllipses (thee.get(), g, -scale, confidence, label, 1, 2, xmin, xmax, ymin, ymax, fontSize, 0);

	if (garnish) {
		char32 llabel [40];
		Graphics_drawInnerBox (g);
		Graphics_marksLeft (g, 2, true, true, false);
		Melder_sprint (llabel,40, U"pc ", d2);
		Graphics_textLeft (g, true, llabel);
		Graphics_marksBottom (g, 2, true, true, false);
		Melder_sprint (llabel,40, U"pc ", d1);
		Graphics_textBottom (g, true, llabel);
	}
}

void GaussianMixture_drawConcentrationEllipses (GaussianMixture me, Graphics g, double scale, int confidence, char32 *label, int pcaDirections, integer d1, integer d2, double xmin, double xmax, double ymin, double ymax, int fontSize, int garnish) {
	if (d1 == 0 && d2 == 0) {
		d1 = 1;
		d2 = 2;
	}
	if (labs (d1) > my dimension || labs (d2) > my dimension) {
		return;
	}

	if (! pcaDirections) {
		SSCPList_drawConcentrationEllipses (my covariances->asSSCPList(), g, -scale, confidence, label,
		                                  labs (d1), labs (d2), xmin, xmax, ymin, ymax, fontSize, garnish);
		return;
	}

	autoPCA him = GaussianMixture_to_PCA (me);

	GaussianMixture_PCA_drawConcentrationEllipses (me, him.get(), g, scale, confidence, label, d1, d2,
	        xmin, xmax, ymin, ymax, fontSize, garnish);
}

void GaussianMixture_initialGuess (GaussianMixture me, TableOfReal thee, double nSigmas, double ru_range) {
	try {
		autoCovariance cov_t = TableOfReal_to_Covariance (thee);

		// assume equal probabilities for mixture
		// assume equal covariance matrices
		// spread centroids on an ellips in pc1-pc2 plane?

		if (my dimension == 1) {
			double dm = 2.0 * sqrt (cov_t -> data [1] [1]) / my numberOfComponents;
			double m1 = cov_t -> centroid [1] - dm;
			for (integer im = 1; im <= my numberOfComponents; im ++) {
				Covariance covi = my covariances->at [im];
				covi -> centroid [1] = m1;
				covi -> data [1] [1] = dm * dm;
				m1 += dm;
				covi -> numberOfObservations = thy numberOfRows / my numberOfComponents;
			}
		} else {
			autoPCA pca = SSCP_to_PCA (cov_t.get());
			autoSSCP s2d = SSCP_toTwoDimensions (cov_t.get(), pca -> eigenvectors.row(1), pca -> eigenvectors.row(2));
			autoConfiguration means2d = Configuration_create (my numberOfComponents, 2);

			double a, b, cs, sn;
			NUMeigencmp22 (s2d -> data [1] [1], s2d -> data [1] [2], s2d -> data [2] [2], & a, & b, & cs, & sn);

			a = nSigmas * sqrt (a);
			b = nSigmas * sqrt (b);
			double angle = 0.0, angle_inc = NUM2pi / my numberOfComponents;
			for (integer im = 1; im <= my numberOfComponents; im++, angle += angle_inc) {
				double xc = a * (1.0 + NUMrandomUniform (-ru_range, ru_range)) * cos (angle);
				double yc = b * (1.0 + NUMrandomUniform (-ru_range, ru_range)) * sin (angle);
				means2d -> data [im] [1] = s2d -> centroid [1] + xc * cs - yc * sn;
				means2d -> data [im] [2] = s2d -> centroid [2] + xc * sn + yc * cs;
			}

			// reconstruct the n-dimensional means from the 2-d from the eigenvectors

			autoTableOfReal means = PCA_Configuration_to_TableOfReal_reconstruct (pca.get(), means2d.get());

			for (integer im = 1; im <= my numberOfComponents; im ++) {
				Covariance covi = my covariances->at [im];
				for (integer ic = 1; ic <= my dimension; ic ++) {
					covi -> centroid [ic] = means -> data [im] [ic];
				}
				covi -> numberOfObservations = thy numberOfRows / my numberOfComponents;
			}

			// Trick: use the new means to get the between SSCP,
			// if there is only one component the cov_b will be zero!

			autoCovariance cov_b = GaussianMixture_to_Covariance_between (me);
			double var_t = SSCP_getTotalVariance (cov_t.get());
			double var_b = SSCP_getTotalVariance (cov_b.get());
			if (var_b >= var_t) { // we have chosen our initial values too far out
				double scale = 0.9 * sqrt (var_t / var_b);
				for (integer im = 1; im <= my numberOfComponents; im ++) {
					Covariance covi = my covariances->at [im];
					for (integer ic = 1; ic <= my dimension; ic ++) {
						covi -> centroid [ic] -= (1.0 - scale) * (covi -> centroid [ic] - cov_b -> centroid [ic]);
					}
				}
				cov_b = GaussianMixture_to_Covariance_between (me);
			}

			// Within variances are now (total - between) / numberOfComponents;

			for (integer ir = 1; ir <= my dimension; ir ++) {
				for (integer ic = ir; ic <= my dimension; ic ++) {
					double scalef = my numberOfComponents == 1 ? 1.0 : (var_b / var_t) / my numberOfComponents;
					cov_t -> data [ic] [ir] = cov_t -> data [ir] [ic] *= scalef;
				}
			}

			// Copy them

			for (integer im = 1; im <= my numberOfComponents; im ++) {
				Covariance cov = my covariances->at [im];
				if (cov -> numberOfRows == 1) {
					for (integer ic = 1; ic <= my dimension; ic ++) {
						cov -> data [1] [ic] = cov_t -> data [ic] [ic];
					}
				} else {
					for (integer ir = 1; ir <= my dimension; ir ++) {
						for (integer ic = ir; ic <= my dimension; ic ++) {
							cov -> data [ir] [ic] = cov -> data [ic] [ir] = cov_t -> data [ir] [ic];
						}
					}
				}
			}
		}
	} catch (MelderError) {
		Melder_throw (me, U" & ", thee, U": no initial guess possible.");
	}
}


autoClassificationTable GaussianMixture_TableOfReal_to_ClassificationTable (GaussianMixture me, TableOfReal thee) {
	try {
		autoClassificationTable him = ClassificationTable_create (thy numberOfRows, my numberOfComponents);
		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			SSCP_expandLowerCholesky (cov);
			TableOfReal_setColumnLabel (him.get(), im, Thing_getName (cov));
		}

		double ln2pid = -0.5 * my dimension * log (NUM2pi);
		autoNUMvector<double> lnN (1, my numberOfComponents);
		for (integer i = 1; i <=  thy numberOfRows; i ++) {
			double psum = 0.0;
			for (integer im = 1; im <= my numberOfComponents; im ++) {
				Covariance cov = my covariances->at [im];
				double dsq = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(i), cov -> centroid.get());
				lnN [im] = ln2pid - 0.5 * (cov -> lnd + dsq);
				psum += his data [i] [im] = my mixingProbabilities [im] * exp (lnN [im]);
			}
			if (psum == 0) { // p's might be too small (underflow), make the largest equal to sfmin
				double lnmax = -1e308;
				integer imm = 1;
				for (integer im = 1; im <= my numberOfComponents; im ++) {
					if (lnN [im] > lnmax) {
						lnmax = lnN [im];
					} imm = im;
				}
				his data [i] [imm] = NUMfpp -> sfmin;
			}

			// for (integer im = 1; im <= my numberOfComponents; im++) { his data [i] [im] /= psum; }

			TableOfReal_setRowLabel (him.get(), i, thy rowLabels [i].get());
		}
		return him;
	} catch (MelderError) {
		Melder_throw (U"No ClassificationTable created from GaussianMixture & TableOfReal.");
	}
}

void GaussianMixture_TableOfReal_getGammas (GaussianMixture me, TableOfReal thee, double **gamma, double *p_lnp) {
	try {
		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			SSCP_expandLowerCholesky (cov);
		}

		double *nk = gamma [thy numberOfRows + 1];
		for (integer im = 1; im <= my numberOfComponents; im ++) {
			nk [im] = 0.0;
		}

		double lnp = 0.0;
		double ln2pid = - 0.5 * my dimension * log (NUM2pi);
		autoVEC lnN = VECraw (my numberOfComponents);
		for (integer i = 1; i <=  thy numberOfRows; i ++) {
			double rowsum = 0.0;
			for (integer im = 1; im <= my numberOfComponents; im ++) {
				Covariance cov = my covariances->at [im];
				double dsq = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(i), cov -> centroid.get());
				lnN [im] = ln2pid - 0.5 * (cov -> lnd + dsq);
				gamma [i] [im] = my mixingProbabilities [im] * exp (lnN [im]); // eq. Bishop 9.16
				rowsum += gamma [i] [im];
			}

			// If the gamma [i]'s are too small, their sum will be zero and the scaling will overflow

			if (rowsum == 0.0) {
				continue;    // This is ok because gamma [i]'s will all be zero
			}

			// scale gamma and get log(likehood) (Bishop eq. 9.40)

			for (integer im = 1; im <= my numberOfComponents; im ++) {
				gamma [i] [im] /= rowsum; // eq. Bishop 9.16
				nk [im] += gamma [i] [im]; // eq. Bishop 9.18
				lnp += gamma [i] [im] * (log (my mixingProbabilities [im])  + lnN [im]); // eq. Bishop 9.40
			}
		}
		if (p_lnp) {
			*p_lnp = lnp;
		}
	} catch (MelderError) {
		Melder_throw (me, U" & ", thee, U": no gammas.");
	}
}

void GaussianMixture_splitComponent (GaussianMixture me, integer component) {
	try {
		Melder_require (component > 0 && component <= my numberOfComponents, U"The component should be in [1, ", my numberOfComponents, U"].");
		
		Covariance thee = my covariances->at [component];
		// Always new PCA because we cannot be sure of data unchanged.
		SSCP_expandPCA (thee);
		autoCovariance cov1 = Data_copy (thee);
		autoCovariance cov2 = Data_copy (thee);
		SSCP_unExpandPCA (cov1.get());
		SSCP_unExpandPCA (cov2.get());

		// Eventually cov1 replaces component, cov2 at end

		autoVEC mixingProbabilities = VECraw (my numberOfComponents + 1);
		for (integer i = 1; i <= my numberOfComponents; i ++) {
			mixingProbabilities [i] = my mixingProbabilities [i];
		}
		double gamma = 0.5, lambda = 0.5, eta = 0.5, mu = 0.5;
		mixingProbabilities [component] = gamma * my mixingProbabilities [component];
		mixingProbabilities [my numberOfComponents + 1] = (1.0 - gamma) * my mixingProbabilities [component];
		double mp12 =  mixingProbabilities [component] / mixingProbabilities [my numberOfComponents + 1];
		double factor1 = (eta - eta * lambda * lambda - 1.0) / gamma + 1.0;
		double factor2 = (eta * lambda * lambda - eta - lambda * lambda) / (1.0 - gamma) + 1.0;
		double *ev = thy pca -> eigenvectors [1];
		double d2 = thy pca -> eigenvalues [1];

		for (integer i = 1; i <= my dimension; i ++) {
			cov1 -> centroid [i] -= (1.0 / sqrt (mp12)) * sqrt (d2) * mu * ev [i];
			cov2 -> centroid [i] +=        sqrt (mp12)  * sqrt (d2) * mu * ev [i];

			if (thy numberOfRows == 1) { // diagonal
				cov1 -> data [1] [i] = cov1 -> data [1] [i] / mp12 + factor1 * d2;
				cov1 -> data [1] [i] = cov2 -> data [i] [i] * mp12 + factor2 * d2;
			} else {
				for (integer j = i; j <= my dimension; j++) {
					cov1 -> data [j] [i] = cov1 -> data [i] [j] = cov1 -> data [i] [j] / mp12 + factor1 * d2 * ev [i] * ev [j];
					cov2 -> data [j] [i] = cov2 -> data [i] [j] = cov2 -> data [i] [j] * mp12 + factor2 * d2 * ev [i] * ev [j];
				}
			}
		}
		cov1 -> numberOfObservations *= gamma;
		cov2 -> numberOfObservations *= 1.0 - gamma;

		// Replace cov1 at component + add cov2. If something goes wrong we should be able to restore original!
		try {
			Thing_setName (cov2.get(), Melder_cat (Thing_getName (cov2.get()), U"-", my numberOfComponents + 1));
			my covariances -> addItem_move (cov2.move());
		} catch (MelderError) {
			Melder_throw (me, U" cannot add new component.");
		}

		my covariances -> replaceItem_move (cov1.move(), component);

		my numberOfComponents ++;
		my mixingProbabilities = mixingProbabilities.move();
	} catch (MelderError) {
		Melder_throw (me, U": component ", component, U" cannot be split.");
	}
}

int GaussianMixture_TableOfReal_getProbabilities (GaussianMixture me, TableOfReal thee, integer component, double **p) {
	try {
		double ln2pid = my dimension * log (NUM2pi);

		// Update only one component or all?

		integer icb = 1, ice = my numberOfComponents;
		if (component > 0 && component <= my numberOfComponents) {
			icb = ice = component;
		}

		for (integer ic = icb; ic <= ice; ic ++) {
			Covariance him = my covariances->at [ic];
			SSCP_expandLowerCholesky (him);

			for (integer i = 1; i <= thy numberOfRows; i++) {
				double dsq = NUMmahalanobisDistance (his lowerCholesky.get(), thy data.row(i), his centroid.get());
				double prob = exp (- 0.5 * (ln2pid + his lnd + dsq));
				prob = prob < 1e-300 ? 1e-300 : prob; // prevent p from being zero
				p [i] [ic] = prob;
			}
		}

		GaussianMixture_updateProbabilityMarginals (me, p, thy numberOfRows);
		return 1;
	} catch (MelderError) {
		Melder_throw (me, U" & ", thee, U": no probabilies could be calculated.");
	}
}

void GaussianMixture_expandPCA (GaussianMixture me) {
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		Covariance him = my covariances->at [im];
		Melder_require (his numberOfRows > 1, U"Nothing to expand.");
		his pca = SSCP_to_PCA (him);
	}
}

void GaussianMixture_unExpandPCA (GaussianMixture me) {
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		SSCP_unExpandPCA (my covariances->at [im]);
	}
}

void GaussianMixture_TableOfReal_improveLikelihood (GaussianMixture me, TableOfReal thee, double delta_lnp, integer maxNumberOfIterations, double lambda, int criterion) {
	try {
		conststring32 criterionText = GaussianMixture_criterionText (criterion);

		// The global covariance matrix is added with scaling coefficient lambda during updating the
		// mixture covariances to prevent numerical instabilities.

		autoCovariance covg = TableOfReal_to_Covariance (thee);
		autoNUMmatrix<double> pp (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);
		double *nk = pp [thy numberOfRows + 1]; // last row has the column marginals n(k)
		if (! GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
			Melder_throw (U"Iteration not started. Too many components?");
		}
		double lnp = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
		integer iter = 0;
		autoMelderProgress progress (U"Improve likelihood...");
		try {
			double lnp_prev, lnp_start = lnp / thy numberOfRows;
			do {

				// E-step: get responsabilities (gamma) with current parameters
				// See C. Bishop (2006), Pattern reconition and machine learning, Springer, page 439...

				lnp_prev = lnp;
				iter ++;
				// M-step: 1. new means & covariances

				for (integer im = 1; im <= my numberOfComponents; im ++) {
					GaussianMixture_updateCovariance (me, im, thy data.at, thy numberOfRows, pp.peek());
					GaussianMixture_addCovarianceFraction (me, im, covg.get(), lambda);
				}

				// M-step: 2. new mixingProbabilities

				for (integer im = 1; im <= my numberOfComponents; im ++) {
					my mixingProbabilities [im] = nk [im] / thy numberOfRows;
				}
				if (! GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
					break;
				}
				lnp = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
				Melder_progress ((double) iter / (double) maxNumberOfIterations, criterionText, U": ", lnp / thy numberOfRows, U", L0: ", lnp_start);
			} while (fabs ((lnp - lnp_prev) / lnp_prev) > delta_lnp && iter < maxNumberOfIterations);
		} catch (MelderError) {
			Melder_clearError ();
		}

		// During EM, covariances were underestimated by a factor of (n-1)/n. Correction now.

		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			if (cov -> numberOfObservations > 1.5) {
				if (cov -> numberOfRows == 1) {
					for (integer j = 1; j <= thy numberOfColumns; j ++) {
						cov -> data [1] [j] *= cov -> numberOfObservations / (cov -> numberOfObservations - 1);
					}
				} else {
					for (integer j = 1; j <= thy numberOfColumns; j ++)
						for (integer k = j; k <= thy numberOfColumns; k ++) {
							cov -> data [j] [k] = cov -> data [k] [j] *= cov -> numberOfObservations / (cov -> numberOfObservations - 1.0);
						}
				}
			}
		}
	} catch (MelderError) {
		Melder_throw (me, U" & ", thee, U": likelihood cannot be improved.");
	}
}

integer GaussianMixture_getNumberOfParametersInComponent (GaussianMixture me) {
	Melder_assert (my covariances->size > 0);
	Covariance thee = my covariances->at [1];
	// if diagonal) d (means) + d (stdev)
	// else  n + n(n+1)/2
	return thy numberOfRows == 1 ? 2 * thy numberOfColumns : thy numberOfColumns * (thy numberOfColumns + 3) / 2;
}

void GaussianMixture_updateProbabilityMarginals (GaussianMixture me, double **p, integer numberOfRows) {
	integer nocp1 = my numberOfComponents + 1, norp1 = numberOfRows + 1;

	for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
		p [norp1] [ic] = 0.0;
	}
	for (integer i = 1; i <= numberOfRows; i ++) {
		double rowsum = 0.0;
		for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
			rowsum += my mixingProbabilities [ic] * p [i] [ic];
		}
		p [i] [nocp1] = rowsum;
		for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
			p [norp1] [ic] += my mixingProbabilities [ic] * p [i] [ic] / p [i] [nocp1];
		}
	}
}

void GaussianMixture_removeComponent_bookkeeping (GaussianMixture me, integer component, double **p, integer numberOfRows) {
	// p is (NumberOfRows+1) by (numberOfComponents+1) !

	for (integer i = 1; i <= numberOfRows + 1; i ++) {
		for (integer ic = component; ic <= my numberOfComponents; ic ++) {
			p [i] [ic] = p [i] [ic + 1];
		}
	}
	GaussianMixture_updateProbabilityMarginals (me, p, numberOfRows);
	GaussianMixture_removeComponent (me, component);
}

double GaussianMixture_TableOfReal_getLikelihoodValue (GaussianMixture me, TableOfReal thee, int criterion) {
	double value = undefined;
	autoNUMmatrix<double> pp (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);
	if (GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, pp.peek())) {
		value = GaussianMixture_getLikelihoodValue (me, pp.peek(), thy numberOfRows, criterion);
	}
	return value;
}

double GaussianMixture_getLikelihoodValue (GaussianMixture me, double **p, integer numberOfRows, int criterion) {
	// Because we try to _maximize_ a criterion, all criteria are negative numbers.

	if (criterion == GaussianMixture_CD_LIKELIHOOD) {
		double lnpcd = 0.0;
		for (integer i = 1; i <= numberOfRows; i ++) {
			double psum = 0, lnsum = 0;
			for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
				double pp = my mixingProbabilities [ic] * p [i] [ic];
				psum += pp;
				lnsum += pp * log (pp);
			}
			if (psum > 0) {
				lnpcd += lnsum / psum;
			}
		}
		return lnpcd;
	}

	// The common factor for all other criteria is the log(likelihood)

	double lnp = 0.0;
	for (integer i = 1; i <= numberOfRows; i ++) {
		double psum = 0.0;
		for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
			psum += my mixingProbabilities [ic] * p [i] [ic];
		}
		if (psum > 0.0) {
			lnp += log (psum);
		}
	}

	if (criterion == GaussianMixture_LIKELIHOOD) {
		return lnp;
	}

	double npars = GaussianMixture_getNumberOfParametersInComponent (me), np = npars * my numberOfComponents;
	if (criterion == GaussianMixture_MML) {
		/* Equation (15) in
			Mario A.T. Figueiredo, and Anil K. Jain, Unsupervised Learning of Finite Mixture Models :
			IEEE TRANSACTIONS ON PATTERN ANALYSIS AND MACHINE INTELLIGENCE, VOL. 24, NO. 3, MARCH 2002

			L(theta,Y)= N/2*sum(m=1..k, log(n*alpha [k]/12)) +k/2*ln(n/12) +k(N+1)/2
				- log (sum(i=1..n, sum(m=1..k, alpha [k]*p(k))))
		*/
		double logmpn = 0.0;
		for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
			logmpn += log (my mixingProbabilities [ic]);
		}

		// a rewritten L(theta,Y) is

		return lnp - 0.5 * my numberOfComponents * (npars + 1) * (log (numberOfRows / 12.0) + 1.0)
		       + 0.5 * npars * logmpn;
	} else if (criterion == GaussianMixture_BIC) {
		return 2.0 * lnp - np * log (numberOfRows);
	} else if (criterion == GaussianMixture_AIC) {
		return 2.0 * (lnp - np);
	} else if (criterion == GaussianMixture_AICC) {
		np = npars * my numberOfComponents;
		return 2.0 * (lnp - np * (numberOfRows / (numberOfRows - np - 1.0)));
	}
	return lnp;
}

autoGaussianMixture GaussianMixture_TableOfReal_to_GaussianMixture_CEMM (GaussianMixture gm, TableOfReal thee, integer minNumberOfComponents, double delta_l, integer maxNumberOfIterations, double lambda, int criterion) {
	try {
		conststring32 criterionText = GaussianMixture_criterionText (criterion);
		bool deleteWeakComponents = ( minNumberOfComponents > 0 );
		autoGaussianMixture me = Data_copy (gm);
		autoNUMmatrix<double> p (1, thy numberOfRows + 2, 1, my numberOfComponents + 1);

		double *gsum = p [thy numberOfRows + 1]; // convenience array with sums

		autoCovariance covg = TableOfReal_to_Covariance (thee);

		double npars = GaussianMixture_getNumberOfParametersInComponent (me.get());
		double nparsd2 = ( deleteWeakComponents ? npars / 2.0 : 0.0 );

		// Initial E-step: Update all p's.

		GaussianMixture_TableOfReal_getProbabilities (me.get(), thee, 0, p.peek());

		double lnew = GaussianMixture_getLikelihoodValue (me.get(), p.peek(), thy numberOfRows, criterion);

		autoMelderProgress progress (U"Gaussian mixture...");
		autoGaussianMixture best;
		try {
			double lstart = lnew / thy numberOfRows;
			integer iter = 0, component;
			double lmax = -1e308, lprev;
			while (my numberOfComponents >= minNumberOfComponents) {
				do {
					iter ++;
					component = 1;
					lprev = lnew;
					while (component <= my numberOfComponents) {
						// M-step for means and covariances
						GaussianMixture_updateProbabilityMarginals (me.get(), p.peek(), thy numberOfRows);
						GaussianMixture_updateCovariance (me.get(), component, thy data.at, thy numberOfRows, p.peek());
						if (lambda > 0) {
							GaussianMixture_addCovarianceFraction (me.get(), component, covg.get(), lambda);
						}

						// Now check if enough support for a component exists

						double support_im = gsum [component] - nparsd2, support = 0.0;
						for (integer ic = 1; ic <= my numberOfComponents; ic ++) {
							double support_ic = gsum [ic] - nparsd2;
							if (support_ic > 0.0) {
								support += support_ic;
							}
						}
						my mixingProbabilities [component] = ( support_im > 0.0 ? support_im : 0.0 );
						if (support > 0.0) {
							my mixingProbabilities [component] /= support;
						}

						NUMdvector_scaleAsProbabilities (my mixingProbabilities.at, my numberOfComponents);

						if (my mixingProbabilities [component] > 0.0) { // update p for component
							GaussianMixture_TableOfReal_getProbabilities (me.get(), thee, component, p.peek());
							component ++;
						} else {
							// "Remove" the component column from p by shifting row values

							GaussianMixture_removeComponent_bookkeeping (me.get(), component, p.peek(), thy numberOfRows);

							// Now numberOfComponents is one less!
							// MelderInfo_writeLine (U"Removed component ", component);
						}
					}

					// L(theta,Y)=N/2 sum(m=1..k, log(n*mixingP [m]/12))+k/2log(n/12)+k/2(N+1)-loglikelihood reduces to:
					// k/2 (N+1){log(n/12)+1}+N/2sum(m=1..k,mixingP [m]) - loglikelihood

					lnew = GaussianMixture_getLikelihoodValue (me.get(), p.peek(), thy numberOfRows, criterion);

					Melder_progress ((double) iter / (double) maxNumberOfIterations, U", ", criterionText, U": ",
						lnew / thy numberOfRows, U"\nComponents: ", my numberOfComponents, U"\nL0: ", lstart);
				} while (lnew > lprev && fabs ((lprev - lnew) / lnew) > delta_l && iter < maxNumberOfIterations);
				if (lnew > lmax) {
					best = Data_copy (me.get());
					lmax = lnew;
					if (! deleteWeakComponents) {
						break;    // TODO was got end; is dat hetzelfde?
					}
				}
				if (my numberOfComponents > 1) { // remove smallest component
					component = 1;
					double mpmin = my mixingProbabilities [component];
					for (integer ic = 2; ic <= my numberOfComponents; ic ++) {
						if (my mixingProbabilities [ic] < mpmin) {
							mpmin = my mixingProbabilities [ic];
							component = ic;
						}
					}

					GaussianMixture_removeComponent_bookkeeping (me.get(), component, p.peek(), thy numberOfRows);
				} else {
					break;
				}
			}
		} catch (MelderError) {
			Melder_clearError ();
		}
		return best;
	} catch (MelderError) {
		Melder_throw (U"GaussianMixture not improved.");
	}
}

// The numberOfElemnts per covariance needs to be updated later
void GaussianMixture_removeComponent (GaussianMixture me, integer component) {
	if (component < 1 || component > my numberOfComponents || my numberOfComponents == 1) {
		return;
	}

	my covariances -> removeItem (component);
	my numberOfComponents --;

	for (integer ic = component; ic <= my numberOfComponents; ic ++) {
		my mixingProbabilities [ic] = my mixingProbabilities [ic + 1];
	}

	NUMdvector_scaleAsProbabilities (my mixingProbabilities.at, my numberOfComponents);
}

autoGaussianMixture TableOfReal_to_GaussianMixture (TableOfReal me, integer numberOfComponents, double delta_lnp, integer maxNumberOfIterations, double lambda, int storage, int criterion) {
	try {
		Melder_require (my numberOfRows >= 2 * numberOfComponents,
			U"The number of data points should at least be twice the number of components.");
		autoGaussianMixture thee = GaussianMixture_create (numberOfComponents, my numberOfColumns, storage);
		GaussianMixture_setLabelsFromTableOfReal (thee.get(), me);
		GaussianMixture_initialGuess (thee.get(), me, 1.0, 0.05);
		if (maxNumberOfIterations <= 0)
			return thee;
		GaussianMixture_TableOfReal_improveLikelihood (thee.get(), me, delta_lnp, maxNumberOfIterations, lambda, criterion);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no GaussianMixture created.");
	}
}

autoCorrelation GaussianMixture_TableOfReal_to_Correlation (GaussianMixture me, TableOfReal thee) {
	try {
		Melder_require (my dimension == thy numberOfColumns,
			U"Dimensions should be equal.");
		autoClassificationTable ct = GaussianMixture_TableOfReal_to_ClassificationTable (me, thee);
		autoCorrelation him = ClassificationTable_to_Correlation_columns (ct.get());
		return him;
	} catch (MelderError) {
		Melder_throw (U"Correlation not created from GaussianMixture & TableOfReal.");
	}
}

double GaussianMixture_getProbabilityAtPosition_string (GaussianMixture me, conststring32 vector_string) {
	autostring32vector vector = STRVECtokenize (vector_string);
	autoVEC pos = {my dimension, kTensorInitializationType::ZERO};
	for (integer i = 1; i <= vector.size; i ++) {
		pos [i] = Melder_atof (vector [i].get());
		if (i == my dimension)
			break;
	}
	double p = GaussianMixture_getProbabilityAtPosition (me, pos.get());
	return p;
}

double GaussianMixture_getMarginalProbabilityAtPosition (GaussianMixture me, constVEC pos, double x) {
	longdouble p = 0.0;
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		double pim = Covariance_getMarginalProbabilityAtPosition (my covariances->at [im], pos, x);
		p += my mixingProbabilities [im] * pim;
	}
	return (double) p;
}

double GaussianMixture_getProbabilityAtPosition (GaussianMixture me, constVEC xpos) {
	longdouble p = 0.0;
	for (integer im = 1; im <= my numberOfComponents; im ++) {
		double pim = Covariance_getProbabilityAtPosition (my covariances->at [im], xpos);
		p += my mixingProbabilities [im] * pim;
	}
	return (double) p;
}

autoMatrix GaussianMixture_PCA_to_Matrix_density (GaussianMixture me, PCA thee, integer d1, integer d2, double xmin, double xmax, integer nx, double ymin, double ymax, integer ny) {
	try {
		Melder_require (my dimension == thy dimension, U"Dimensions should be equal.");
		Melder_require (d1 <= thy numberOfEigenvalues && d2 <= thy numberOfEigenvalues, U"Direction index too high.");
		
		autoVEC v = { my dimension, kTensorInitializationType::ZERO };
		if (xmax == xmin || ymax == ymin) {
			double xmind, xmaxd, ymind, ymaxd, nsigmas = 2.0;

			GaussianMixture_PCA_getIntervalsAlongDirections (me, thee, d1, d2, nsigmas, &xmind, &xmaxd, &ymind, &ymaxd);
			if (xmax == xmin) {
				xmin = xmind;
				xmax = xmaxd;
			}
			if (ymax == ymin) {
				ymin = ymind;
				ymax = ymaxd;
			}
		}

		// xmin,xmax and ymin,ymax are coordinates in the pc1 vs pc2 plane

		double dx = fabs (xmax - xmin) / nx, dy = fabs (ymax - ymin) / ny;
		double x1 = xmin + 0.5 * dx, y1 = ymin + 0.5 * dy;
		autoMatrix him = Matrix_create (xmin, xmax, nx, dx, x1, ymin, ymax, ny, dy, y1);

		for (integer i = 1; i <= ny; i ++) {
			double y = y1 + (i - 1) * dy;
			for (integer j = 1; j <= nx; j ++) {
				double x = x1 + (j - 1) * dx;
				for (integer k = 1; k <= my dimension; k ++) {
					v [k] = x * thy eigenvectors [d1] [k] + y * thy eigenvectors [d2] [k];
				}
				his z [i] [j] = GaussianMixture_getProbabilityAtPosition (me, v.get());
			}
		}
		return him;
	} catch (MelderError) {
		Melder_throw (me, U" & ", thee, U": no Matrix density created.");
	}
}

autoTableOfReal GaussianMixture_to_TableOfReal_randomSampling (GaussianMixture me, integer numberOfPoints) {
	try {
		Covariance cov = my covariances->at [1];
		autoTableOfReal thee = TableOfReal_create (numberOfPoints, my dimension);
		autoVEC buf (my dimension, kTensorInitializationType::RAW);
		thy columnLabels. copyElementsFrom_upTo (cov -> columnLabels.get(), my dimension);
			// ppgb FIXME: is the number of column labels in the covariance equal to the number of dimensions? If so, document or assert.
		for (integer i = 1; i <= numberOfPoints; i ++) {
			char32 *covname;
			GaussianMixture_generateOneVector_inline (me, thy data.row (i), & covname, buf.get());
			TableOfReal_setRowLabel (thee.get(), i, covname);
		}
		GaussianMixture_unExpandPCA (me);
		return thee;
	} catch (MelderError) {
		GaussianMixture_unExpandPCA (me); 
		Melder_throw (U"TableOfReal with random sampling not created.");
	}
}

autoTableOfReal GaussianMixture_TableOfReal_to_TableOfReal_BHEPNormalityTests (GaussianMixture me, TableOfReal thee, double h) {
	try {
		integer n = thy numberOfRows, d = thy numberOfColumns, nocp1 = my numberOfComponents + 1;
		
		Melder_require (d == my dimension, U"Dimensions should agree.");
		
		// We cannot use a classification table because this could weigh a far-off data point with high probability

		autoNUMmatrix<double> p (1, thy numberOfRows + 1, 1, my numberOfComponents + 1);

		GaussianMixture_TableOfReal_getProbabilities (me, thee, 0, p.peek());

		// prob, beta, tnbo, lnmu, lnvar, ndata, ncol
		autoTableOfReal him = TableOfReal_create (my numberOfComponents, 7);

		// labels

		integer iprob = 1, ih = 2, itnb = 3, ilnmu = 4, ilnvar = 5, indata = 6, id = 7;
		conststring32 label [8] = { U"", U"p", U"h", U"tnb", U"lnmu", U"lnvar", U"ndata", U"d" };
		for (integer icol = 1; icol <= 7; icol ++) {
			TableOfReal_setColumnLabel (him.get(), icol, label [icol]);
		}
		for (integer irow = 1; irow <= my numberOfComponents; irow ++) {
			Covariance cov = my covariances->at [irow];
			TableOfReal_setRowLabel (him.get(), irow, Thing_getName (cov));
		}

		for (integer icol = 1 ; icol <= my numberOfComponents; icol ++) {
			his data [icol] [indata] = p [n + 1] [icol];
		}

		for (integer im = 1; im <= my numberOfComponents; im ++) {
			Covariance cov = my covariances->at [im];
			double mixingP = my mixingProbabilities [im];
			double nd = his data [im] [indata], d2 = d / 2.0;
			double beta = h > 0.0 ? NUMsqrt1_2 / h : NUMsqrt1_2 * pow ( (1.0 + 2.0 * d) / 4.0, 1.0 / (d + 4.0)) * pow (nd, 1.0 / (d + 4.0));
			double beta2 = beta * beta, beta4 = beta2 * beta2, beta8 = beta4 * beta4;
			double gamma = 1.0 + 2.0 * beta2, gamma2 = gamma * gamma, gamma4 = gamma2 * gamma2;
			double delta = 1.0 + beta2 * (4.0 + 3.0 * beta2), delta2 = delta * delta;
			double mu = 1.0 - pow (gamma, -d2) * (1.0 + d * beta2 / gamma + d * (d + 2.0) * beta4 / (2.0 * gamma2));
			double var = 2.0 * pow (1.0 + 4.0 * beta2, -d2)
			             + 2.0 * pow (gamma,  -d) * (1.0 + 2.0 * d * beta4 / gamma2  + 3.0 * d * (d + 2.0) * beta8 / (4.0 * gamma4))
			             - 4.0 * pow (delta, -d2) * (1.0 + 3.0 * d * beta4 / (2.0 * delta) + d * (d + 2.0) * beta8 / (2.0 * delta2));
			double mu2 = mu * mu;

			double prob = undefined, tnb = undefined, lnmu = undefined, lnvar = undefined;

			try {
				SSCP_expandLowerCholesky (cov);
			} catch (MelderError) {
				tnb = 4.0 * nd;
			}

			double djk, djj, sumjk = 0.0, sumj = 0.0;
			double b1 = beta2 / 2.0, b2 = b1 / (1.0 + beta2);

			/* Heinze & Wagner (1997), page 3
				We use d [j] [k] = ||Y [j]-Y [k]||^2 = (Y [j]-Y [k])'S^(-1)(Y [j]-Y [k])
				So d [j] [k]= d [k] [j] and d [j] [j] = 0
			*/
			for (integer j = 1; j <= n; j ++) {
				double wj = p [j] [nocp1] > 0.0 ? mixingP * p [j] [im] / p [j] [nocp1] : 0.0;
				for (integer k = 1; k < j; k ++) {
					djk = NUMmahalanobisDistance_chi (cov -> lowerCholesky.at, thy data [j], thy data [k], d, d);
					double w = p [k] [nocp1] > 0.0 ? wj * mixingP * p [k] [im] / p [k] [nocp1] : 0.0;
					sumjk += 2.0 * w * exp (-b1 * djk); // factor 2 because d [j] [k] == d [k] [j]
				}
				sumjk += wj * wj; // for k == j. Is this ok now for probability weighing ????
				djj = NUMmahalanobisDistance (cov -> lowerCholesky.get(), thy data.row(j), cov -> centroid.get());
				sumj += wj * exp (-b2 * djj);
			}
			tnb = (1.0 / nd) * sumjk - 2.0 * pow (1.0 + beta2, - d2) * sumj + nd * pow (gamma, - d2); // n *

			his data [im] [ilnmu] = lnmu = 0.5 * log (mu2 * mu2 / (mu2 + var)); //log (sqrt (mu2 * mu2 /(mu2 + var)));
			his data [im] [ilnvar] = lnvar = sqrt (log ( (mu2 + var) / mu2));
			his data [im] [iprob] = prob = NUMlogNormalQ (tnb, lnmu, lnvar);
			his data [im] [ih] = NUMsqrt1_2 / beta;
			his data [im] [id] = d;
			his data [im] [itnb] = tnb;
		}
		return him;
	} catch (MelderError) {
		Melder_throw (U"TableOfReal for BHEP not created.");
	}
}

/* End of file GaussianMixture.cpp 1555*/