emily
/
praat


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
							/* Sound_to_Formant.cpp
 *
 * Copyright (C) 1992-2011,2014,2015,2016 Paul Boersma
 *
 * This code is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This code is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this work. If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * pb 2002/07/16 GPL
 * pb 2003/05/15 replaced memcof with NUMburg
 * pb 2003/09/18 default time step is 4 times oversampling
 * pb 2006/05/10 better handling of interruption in Sound_to_Formant
 * pb 2006/05/10 better handling of NULL from Polynomial_to_Roots
 * pb 2007/01/26 made compatible with stereo Sounds
 * pb 2007/03/30 changed float to double (against compiler warnings)
 * pb 2010/12/13 removed some style bugs
 * pb 2011/06/08 C++
 */

#include "Sound_to_Formant.h"
#include "NUM2.h"
#include "Polynomial.h"

static void burg (constVEC samples, VEC coefficients,
	Formant_Frame frame, double nyquistFrequency, double safetyMargin)
{
	double a0 =	NUMburg_preallocated (coefficients, samples);
	/*
		Convert LP coefficients to polynomial.
	 */
	autoPolynomial polynomial = Polynomial_create (-1, 1, coefficients.size);
	for (int i = 1; i <= coefficients.size; i ++)
		polynomial -> coefficients [i] = - coefficients [coefficients.size - i + 1];
	polynomial -> coefficients [coefficients.size + 1] = 1.0;

	/*
		Find the roots of the polynomial.
	 */
	autoRoots roots = Polynomial_to_Roots (polynomial.get());
	Roots_fixIntoUnitCircle (roots.get());

	Melder_assert (frame -> nFormants == 0 && ! frame -> formant);

	/*
		First pass: count the formants.
		The roots come in conjugate pairs, so we need only count those above the real axis.
	 */
	for (int i = roots -> min; i <= roots -> max; i ++)
		if (roots -> v [i]. im >= 0) {
			double f = fabs (atan2 (roots -> v [i].im, roots -> v [i].re)) * nyquistFrequency / NUMpi;
			if (f >= safetyMargin && f <= nyquistFrequency - safetyMargin)
				frame -> nFormants ++;
		}

	/*
		Create space for formant data.
	 */
	if (frame -> nFormants > 0)
		frame -> formant = NUMvector <structFormant_Formant> (1, frame -> nFormants);

	/*
		Second pass: fill in the formants.
	 */
	int iformant = 0;
	for (int i = roots -> min; i <= roots -> max; i ++) if (roots -> v [i]. im >= 0.0) {
		double f = fabs (atan2 (roots -> v [i].im, roots -> v [i].re)) * nyquistFrequency / NUMpi;
		if (f >= safetyMargin && f <= nyquistFrequency - safetyMargin) {
			Formant_Formant formant = & frame -> formant [++ iformant];
			formant -> frequency = f;
			formant -> bandwidth = -
				log (roots -> v [i].re * roots -> v [i].re + roots -> v [i].im * roots -> v [i].im) * nyquistFrequency / NUMpi;
		}
	}
	Melder_assert (iformant == frame -> nFormants);   // may fail if some frequency is NaN
}

static int findOneZero (int ijt, double vcx [], double a, double b, double *zero) {
	double x = 0.5 * (a + b), fa = 0.0, fb = 0.0, fx = 0.0;
	integer k;
	for (k = ijt; k >= 0; k --) {
		fa = vcx [k] + a * fa;
		fb = vcx [k] + b * fb;
	}
	if (fa * fb >= 0.0) {   // there should be a zero between a and b
		Melder_casual (
			U"There is no zero between ,", Melder_single (a),
			U" and ", Melder_single (b),
			U".\n    The function values are ", Melder_single (fa),
			U" and ", Melder_single (fb),
			U", respectively."
		);
		return 0;
	}
	do {
		fx = 0.0;
		/*x = fa == fb ? 0.5 * (a + b) : a + fa * (a - b) / (fb - fa);*/
		x = 0.5 * (a + b);   // simple bisection
		for (k = ijt; k >= 0; k --) fx = vcx [k] + x * fx;
		if (fa * fx > 0.0) { a = x; fa = fx; } else { b = x; fb = fx; }
	} while (fabs (fx) > 1e-5);
	*zero = x;
	return 1;   // OK
}

static int findNewZeroes (int ijt, double ppORIG [], int degree,
	double zeroes [])   // In / out
{
	static double cosa [7] [7] = {
		{  1,   0,   0,   0,   0,   0,   0 },
		{  0,   2,   0,   0,   0,   0,   0 },
		{ -2,   0,   4,   0,   0,   0,   0 },
		{  0,  -6,   0,   8,   0,   0,   0 },
		{  2,   0, -16,   0,  16,   0,   0 },
		{  0,  10,   0, -40,   0,  32,   0 },
		{ -2,   0,  36,   0, -96,   0,  64 } };
	double pp [33], newZeroes [33], px [33];
	int pt, vt, i, half_degree = (degree + 1) / 2;
	for (vt = 0; vt <= half_degree; vt ++) pp [vt] = ppORIG [vt];
	if (! (degree & 1))
		for (vt = 1; vt <= half_degree; vt ++) pp [vt] -= pp [vt - 1];
	for (i = 0; i <= half_degree; i ++)
		px [i] = cosa [half_degree] [i];
	for (pt = half_degree - 1; pt >= 0; pt --)
		for (vt = 0; vt <= half_degree; vt ++)
			px [vt] += pp [half_degree - pt] * cosa [pt] [vt];
	/* Fill an array with the new zeroes, which lie between the old zeroes. */
	newZeroes [0] = 1.0;
	for (i = 1; i <= half_degree; i ++) {
		if (! findOneZero (ijt, px, zeroes [i - 1], zeroes [i], & newZeroes [i])) {
			Melder_casual (
				U"Degree ", degree,
				U" not completed."
			);
			return 0;
		}
	}
	newZeroes [half_degree + 1] = -1.0;
	/* Grow older. */
	for (i = 0; i <= half_degree + 1; i ++)
		zeroes [i] = newZeroes [i];
	return 1;
}

static int splitLevinson (
	constVEC samples,   // the windowed signal xw [1..nx]
	int ncof,   // the coefficients cof [1..ncof]
	Formant_Frame frame, double nyquistFrequency)   // put the results here
{
	int result = 1;
	double rx [100], zeroes [33];
	for (int i = 0; i <= 32; i ++)
		zeroes [i] = 0.0;
	/* Compute the autocorrelation of the windowed signal. */
	for (int i = 0; i < ncof; i ++) {
		rx [i] = 0.0;
		for (int j = 1; j <= samples.size - i; j ++)
			rx [i] += samples [j] * samples [j + i];
	}
	/* Normalize autocorrelation; (should we also divide by the autocorrelation of the window?). */
	for (int i = 1; i < ncof; i ++)
		rx [i] /= rx [0]; rx [0] = 1.0;

	/* Compute zeroes. */
	{
		double tau = 0.5 * rx [0];
		double pnu [33], pk [33], pkz [33];
		for (int i = 0; i <= 32; i ++)
			pkz [i] = pk [i] = 0.0;
		pkz [0] = 1.0; pk [0] = 1.0; pk [1] = 1.0;
		for (int degree = 1; degree < ncof; degree ++) {
			int t = degree / 2;
			double tauk = rx [0] + rx [degree], alfak;
			for (int it = 1; it <= t; it ++)
				tauk += pk [it] * ( 2 * it == degree ? rx [it] : rx [it] + rx [degree - it] );
			alfak = tauk / tau;
			tau = tauk;
			pnu [0] = 1.0;
			int ijt = (degree + 1) / 2;
			for (int it = ijt; it > 0; it --)
				pnu [it] = pk [it] + pk [it - 1] - alfak * pkz [it - 1];
			if (2 * ijt == degree) pnu [ijt + 1] = pnu [ijt];
			if (degree == 1) {
				(void) 0;
			} else if (degree == 2) {
				zeroes [0] = 1.0;   // starting values
				zeroes [1] = 0.5 - 0.5 * pnu [1];
				zeroes [2] = -1.0;
			}
			else
				if (! findNewZeroes (ijt, pnu, degree, zeroes)) {
					result = 0;
					goto loopEnd;
				}
			/* Grow older. */
			for (int i = 0; i <= 32; i ++) {
				pkz [i] = pk [i];
				pk [i] = pnu [i];
			}
		}
	}
loopEnd:
	/* First pass: count the poles. */
	for (int i = 1; i <= ncof / 2; i ++) {
		if (zeroes [i] == 0.0 || zeroes [i] == -1.0) break;
		frame -> nFormants ++;
	}

	/* Create space for formant data. */
	if (frame -> nFormants > 0)
	    frame -> formant = NUMvector <structFormant_Formant> (1, frame -> nFormants);

	/* Second pass: fill in the poles. */
	int iformant = 0;
	for (int i = 1; i <= ncof / 2; i ++) {
		Formant_Formant formant = & frame -> formant [++ iformant];
		if (zeroes [i] == 0.0 || zeroes [i] == -1.0) break;
		formant -> frequency =  acos (zeroes [i]) * nyquistFrequency / NUMpi;
		formant -> bandwidth = 50.0;
	}

	return result;
}

static void Sound_preEmphasis (Sound me, double preEmphasisFrequency) {
	double preEmphasis = exp (-2.0 * NUMpi * preEmphasisFrequency * my dx);
	for (integer channel = 1; channel <= my ny; channel ++) {
		double *s = my z [channel]; 
		for (integer i = my nx; i >= 2; i --) s [i] -= preEmphasis * s [i - 1];
	}
}

void Formant_sort (Formant me) {
	for (integer iframe = 1; iframe <= my nx; iframe ++) {
		Formant_Frame frame = & my d_frames [iframe];
		integer n = frame -> nFormants;
		for (integer i = 1; i < n; i ++) {
			double min = frame -> formant [i]. frequency;
			integer imin = i;
			for (integer j = i + 1; j <= n; j ++)
				if (frame -> formant [j]. frequency < min) {
					min = frame -> formant [j]. frequency;
					imin = j;
				}
			if (imin != i) {
				double min_bandwidth = frame -> formant [imin]. bandwidth;
				frame -> formant [imin]. frequency = frame -> formant [i]. frequency;
				frame -> formant [imin]. bandwidth = frame -> formant [i]. bandwidth;
				frame -> formant [i]. frequency = min;
				frame -> formant [i]. bandwidth = min_bandwidth;
			}
		}
	}
}

static autoFormant Sound_to_Formant_any_inplace (Sound me, double dt_in, int numberOfPoles,
	double halfdt_window, int which, double preemphasisFrequency, double safetyMargin)
{
	double dt = dt_in > 0.0 ? dt_in : halfdt_window / 4.0;
	double physicalDuration = my nx * my dx, t1;
	double dt_window = 2.0 * halfdt_window;
	integer nFrames = 1 + Melder_ifloor ((physicalDuration - dt_window) / dt);
	integer nsamp_window = Melder_ifloor (dt_window / my dx), halfnsamp_window = nsamp_window / 2;

	if (nsamp_window < numberOfPoles + 1)
		Melder_throw (U"Window too short.");
	t1 = my x1 + 0.5 * (physicalDuration - my dx - (nFrames - 1) * dt);   // centre of first frame
	if (nFrames < 1) {
		nFrames = 1;
		t1 = my x1 + 0.5 * physicalDuration;
		dt_window = physicalDuration;
		nsamp_window = my nx;
	}
	autoFormant thee = Formant_create (my xmin, my xmax, nFrames, dt, t1, (numberOfPoles + 1) / 2);   // e.g. 11 poles -> maximally 6 formants

	autoMelderProgress progress (U"Formant analysis...");

	/* Pre-emphasis. */
	Sound_preEmphasis (me, preemphasisFrequency);

	/* Gaussian window. */
	auto window = VECraw (nsamp_window);
	for (integer i = 1; i <= nsamp_window; i ++) {
		double imid = 0.5 * (nsamp_window + 1), edge = exp (-12.0);
		window [i] = (exp (-48.0 * (i - imid) * (i - imid) / (nsamp_window + 1) / (nsamp_window + 1)) - edge) / (1.0 - edge);
	}

	integer maximumFrameLength = nsamp_window;
	auto frameBuffer = VECraw (maximumFrameLength);
	auto coefficients = VECraw (numberOfPoles);   // superfluous if which==2, but nobody uses that anyway
	for (integer iframe = 1; iframe <= nFrames; iframe ++) {
		double t = Sampled_indexToX (thee.get(), iframe);
		integer leftSample = Sampled_xToLowIndex (me, t);
		integer rightSample = leftSample + 1;
		integer startSample = rightSample - halfnsamp_window;
		integer endSample = leftSample + halfnsamp_window;
		double maximumIntensity = 0.0;
		if (startSample < 1) startSample = 1;   // this should not be more than a rounding problem
		if (endSample > my nx) endSample = my nx;   // this should not be more than a rounding problem
		for (integer i = startSample; i <= endSample; i ++) {
			double value = Sampled_getValueAtSample (me, i, Sound_LEVEL_MONO, 0);
			if (value * value > maximumIntensity)
				maximumIntensity = value * value;
		}
		thy d_frames [iframe]. intensity = maximumIntensity;
		if (maximumIntensity == 0.0) continue;   // Burg cannot stand all zeroes

		/* Copy a pre-emphasized window to a frame. */
		const integer actualFrameLength = endSample - startSample + 1;   // should rarely be less than nsamp_window
		VEC frame = frameBuffer.subview (1, actualFrameLength);
		const integer offset = startSample - 1;
		for (integer isamp = 1; isamp <= actualFrameLength; isamp ++)
			frame [isamp] = Sampled_getValueAtSample (me, offset + isamp, Sound_LEVEL_MONO, 0) * window [isamp];

		if (which == 1) {
			burg (frame, coefficients.get(), & thy d_frames [iframe], 0.5 / my dx, safetyMargin);
		} else if (which == 2) {
			if (! splitLevinson (frame, numberOfPoles, & thy d_frames [iframe], 0.5 / my dx)) {
				Melder_clearError ();
				Melder_casual (U"(Sound_to_Formant:)"
					U" Analysis results of frame ", iframe,
					U" will be wrong."
				);
			}
		}
		Melder_progress ((double) iframe / (double) nFrames, U"Formant analysis: frame ", iframe);
	}
	Formant_sort (thee.get());
	return thee;
}

autoFormant Sound_to_Formant_any (Sound me, double dt, int numberOfPoles, double maximumFrequency,
	double halfdt_window, int which, double preemphasisFrequency, double safetyMargin)
{
	double nyquist = 0.5 / my dx;
	autoSound sound;
	if (maximumFrequency <= 0.0 || fabs (maximumFrequency / nyquist - 1) < 1.0e-12) {
		sound = Data_copy (me);   // will be modified
	} else {
		sound = Sound_resample (me, maximumFrequency * 2, 50);
	}
	return Sound_to_Formant_any_inplace (sound.get(), dt, numberOfPoles, halfdt_window, which, preemphasisFrequency, safetyMargin);
}

autoFormant Sound_to_Formant_burg (Sound me, double dt, double nFormants, double maximumFrequency, double halfdt_window, double preemphasisFrequency) {
	try {
		return Sound_to_Formant_any (me, dt, (int) (2 * nFormants), maximumFrequency, halfdt_window, 1, preemphasisFrequency, 50.0);
	} catch (MelderError) {
		Melder_throw (me, U": formant analysis (Burg) not performed.");
	}
}

autoFormant Sound_to_Formant_keepAll (Sound me, double dt, double nFormants, double maximumFrequency, double halfdt_window, double preemphasisFrequency) {
	try {
		return Sound_to_Formant_any (me, dt, (int) (2 * nFormants), maximumFrequency, halfdt_window, 1, preemphasisFrequency, 0.0);
	} catch (MelderError) {
		Melder_throw (me, U": formant analysis (keep all) not performed.");
	}
}

autoFormant Sound_to_Formant_willems (Sound me, double dt, double nFormants, double maximumFrequency, double halfdt_window, double preemphasisFrequency) {
	try {
		return Sound_to_Formant_any (me, dt, (int) (2 * nFormants), maximumFrequency, halfdt_window, 2, preemphasisFrequency, 50.0);
	} catch (MelderError) {
		Melder_throw (me, U": formant analysis (Burg) not performed.");
	}
}

/* End of file Sound_to_Formant.cpp */