emily
/
praat


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
							/* Sound_and_Spectrogram_extensions.cpp
 *
 * Copyright (C) 1993-2017 David Weenink
 *
 * This code is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This code is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this work. If not, see <http://www.gnu.org/licenses/>.
 */

/*
 djmw 20010718
 djmw 20020813 GPL header.
 djmw 20041124 Changed call to Sound_to_Spectrum.
 djmw 20070103 Sound interface changes
 djmw 20071107 Errors/warnings text changes
 djmw 20071202 Melder_warning<n>
*/

#include "Sound_and_Spectrogram_extensions.h"
#include "Sound_extensions.h"
#include "Sound_and_Spectrum.h"
#include "Sound_to_Pitch.h"
#include "Vector.h"
#include "NUM2.h"

autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int to_dB);

/*
	The gaussian(x) = (exp(-48*((i-(n+1)/2)/(n+1))^2)-exp(-12))/(1-exp(-12));
	For power we need the area under the square of this window:
	Integrate (gaussian(i)^2,i=1..n) =
		(sqrt(Pi)*sqrt(3)*sqrt(2)*erf(2*(n-1)*sqrt(3)*sqrt(2)/(n+1))*(n+1) + 24*exp(-24)*(n-1)+
		-4*sqrt(Pi)*sqrt(3)*exp(-12)*erf(2*(n-1)*sqrt(3)/(n+1))*(n+1))/ (24 * (-1+exp(-12))^2),
	where erf(x) = 1 - erfc(x) and n is the windowLength in samples.
	To compare with the rectangular window we need to divide this by the window width (n -1) x 1^2.
*/
static void _Spectrogram_windowCorrection (Spectrogram me, integer numberOfSamples_window) {
	double windowFactor = 1.0;
	if (numberOfSamples_window > 1) {
		double e12 = exp (-12);
		double denum = (e12 - 1) * (e12 - 1.0) * 24 * (numberOfSamples_window - 1);
		double arg1 = 2.0 * NUMsqrt3 * (numberOfSamples_window - 1) / (numberOfSamples_window + 1);
		double arg2 = arg1 * NUMsqrt2;
		double p2 = NUMsqrtpi * NUMsqrt3 * NUMsqrt2 * (1 - NUMerfcc (arg2)) * (numberOfSamples_window + 1);
		double p1 = 4 * NUMsqrtpi * NUMsqrt3 * e12 * (1 - NUMerfcc (arg1)) * (numberOfSamples_window + 1);
		windowFactor =  (p2 - p1 + 24 * (numberOfSamples_window - 1) * e12 * e12) / denum;
	}
	for (integer i = 1; i <= my ny; i ++) {
		for (integer j = 1; j <= my nx; j ++) {
			my z [i] [j] /= windowFactor;
		}
	}
}

static autoSpectrum Sound_to_Spectrum_power (Sound me) {
	try {
		autoSpectrum thee = Sound_to_Spectrum (me, true);
		double scale = 2.0 * thy dx / (my xmax - my xmin);

		// factor '2' because we combine positive and negative frequencies
		// thy dx : width of frequency bin
		// my xmax - my xmin : duration of sound

		double *re = thy z [1], *im = thy z [2];
		for (integer i = 1; i <= thy nx; i ++) {
			double power = scale * (re [i] * re [i] + im [i] * im [i]);
			re [i] = power; im [i] = 0;
		}

		// Correction of frequency bins at 0 Hz and nyquist: don't count for two.

		re [1] *= 0.5; re [thy nx] *= 0.5;
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Spectrum with spectral power created.");
	}
}

static void Sound_into_BarkSpectrogram_frame (Sound me, BarkSpectrogram thee, integer frame) {
	autoSpectrum him = Sound_to_Spectrum_power (me);
	integer numberOfFrequencies = his nx;
	autoNUMvector<double> z (1, numberOfFrequencies);

	for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
		double fhz = his x1 + (ifreq - 1) * his dx;
		z [ifreq] = thy v_hertzToFrequency (fhz);
	}

	for (integer i = 1; i <= thy ny; i ++) {
		double p = 0;
		double z0 = thy y1 + (i - 1) * thy dy;
		double *pow = his z [1]; // TODO ??
		for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
			// Sekey & Hanson filter is defined in the power domain.
			// We therefore multiply the power with a (and not a^2).
			// integral (F(z),z=0..25) = 1.58/9

			double a = NUMsekeyhansonfilter_amplitude (z0, z [ifreq]);
			p += a * pow [ifreq] ;
		}
		thy z [i] [frame] = p;
	}
}

autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) {
	try {
		double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist;
		double windowDuration = 2.0 * analysisWidth; /* gaussian window */
		double zmax = NUMhertzToBark2 (nyquist);
		double fmin_bark = 0;

		// Check defaults.

		if (f1_bark <= 0.0) {
			f1_bark = 1.0;
		}
		if (fmax_bark <= 0.0) {
			fmax_bark = zmax;
		}
		if (df_bark <= 0) {
			df_bark = 1.0;
		}

		fmax_bark = std::min (fmax_bark, zmax);
		integer numberOfFilters = Melder_iround ( (fmax_bark - f1_bark) / df_bark);
		if (numberOfFilters <= 0) {
			Melder_throw (U"The combination of filter parameters is not valid.");
		}

		integer numberOfFrames;
		double t1;
		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark);

		autoMelderProgress progess (U"BarkSpectrogram analysis");

		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			double t = Sampled_indexToX (thee.get(), iframe);

			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());
			Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe);

			if (iframe % 10 == 1) {
				Melder_progress ( (double) iframe / numberOfFrames,  U"BarkSpectrogram analysis: frame ",
					iframe, U" from ", numberOfFrames, U".");
			}
		}
		
		_Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);

		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no BarkSpectrogram created.");
	}
}

static void Sound_into_MelSpectrogram_frame (Sound me, MelSpectrogram thee, integer frame) {
	autoSpectrum him = Sound_to_Spectrum_power (me);

	for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
		double power = 0;
		double fc_mel = thy y1 + (ifilter - 1) * thy dy;
		double fc_hz = thy v_frequencyToHertz (fc_mel);
		double fl_hz = thy v_frequencyToHertz (fc_mel - thy dy);
		double fh_hz =  thy v_frequencyToHertz (fc_mel + thy dy);
		integer ifrom, ito;
		Sampled_getWindowSamples (him.get(), fl_hz, fh_hz, & ifrom, & ito);
		for (integer i = ifrom; i <= ito; i ++) {
			// Bin with a triangular filter the power (= amplitude-squared)

			double f = his x1 + (i - 1) * his dx;
			double a = NUMtriangularfilter_amplitude (fl_hz, fc_hz, fh_hz, f);
			power += a * his z [1] [i];
		}
		thy z [ifilter] [frame] = power;
	}
}

autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) {
	try {
		double samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency;
		double windowDuration = 2.0 * analysisWidth;   // Gaussian window
		double fmin_mel = 0.0;
		double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist);

		// Check defaults.

		if (fmax_mel <= 0.0 || fmax_mel > fceiling) {
			fmax_mel = fceiling;
		}
		if (fmax_mel <= f1_mel) {
			f1_mel = fbottom; fmax_mel = fceiling;
		}
		if (f1_mel <= 0.0) {
			f1_mel = fbottom;
		}
		if (df_mel <= 0.0) {
			df_mel = 100.0;
		}

		// Determine the number of filters.

		integer numberOfFilters = Melder_iround ((fmax_mel - f1_mel) / df_mel);
		fmax_mel = f1_mel + numberOfFilters * df_mel;

		integer numberOfFrames;
		double t1;
		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel);

		autoMelderProgress progress (U"MelSpectrograms analysis");

		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			double t = Sampled_indexToX (thee.get(), iframe);
			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());
			Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe);
			
			if (iframe % 10 == 1) {
				Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U".");
			}
		}
		
		_Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);

		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no MelSpectrogram created.");
	}
}

/*
	Analog formant filter response :
	H(f) = i f B / (f1^2 - f^2 + i f B)
*/
static int Sound_into_Spectrogram_frame (Sound me, Spectrogram thee, integer frame, double bw) {
	Melder_assert (bw > 0);
	autoSpectrum him = Sound_to_Spectrum_power (me);

	for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
		double p = 0;
		double fc = thy y1 + (ifilter - 1) * thy dy;
		double *pow = his z [1];
		for (integer ifreq = 1; ifreq <= his nx; ifreq ++) {
			/*
				H(f) = ifB / (fc^2 - f^2 + ifB)
				H(f)| = fB / sqrt ((fc^2 - f^2)^2 + f^2B^2)
				|H(f)|^2 = f^2B^2 / ((fc^2 - f^2)^2 + f^2B^2)
						 = 1 / (((fc^2 - f^2) /fB)^2 + 1)
			*/
			double f = his x1 + (ifreq - 1) * his dx;
			double a = NUMformantfilter_amplitude (fc, bw, f);
			p += a * pow [ifreq];
		}
		thy z [ifilter] [frame] = p;
	}
	return 1;
}

autoSpectrogram Sound_to_Spectrogram_pitchDependent (Sound me, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw, double minimumPitch, double maximumPitch) {
	try {
		double floor = 80.0, ceiling = 600.0;
		if (minimumPitch >= maximumPitch) {
			minimumPitch = floor;
			maximumPitch = ceiling;
		}
		if (minimumPitch <= 0.0) {
			minimumPitch = floor;
		}
		if (maximumPitch <= 0.0) {
			maximumPitch = ceiling;
		}

		autoPitch thee = Sound_to_Pitch (me, dt, minimumPitch, maximumPitch);
		autoSpectrogram ff = Sound_Pitch_to_Spectrogram (me, thee.get(), analysisWidth, dt, f1_hz, fmax_hz, df_hz, relative_bw);
		return ff;
	} catch (MelderError) {
		Melder_throw (me, U": no Spectrogram created.");
	}
}

autoSpectrogram Sound_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) {
	try {
		double t1, windowDuration = 2.0 * analysisWidth; /* gaussian window */
		double nyquist = 0.5 / my dx, samplingFrequency = 2.0 * nyquist, fmin_hz = 0.0;
		integer numberOfFrames, numberOfUndefinedPitchFrames = 0;

		Melder_require (my xmin >= thy xmin && my xmax <= thy xmax,
			U"The domain of the Sound should be included in the domain of the Pitch.");

		double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit::HERTZ);

		if (isundef (f0_median) || f0_median == 0.0) {
			f0_median = 100.0;
			Melder_warning (U"Pitch values undefined. Bandwith fixed to 100 Hz. ");
		}

		if (f1_hz <= 0.0) {
			f1_hz = 100.0;
		}
		if (fmax_hz <= 0.0) {
			fmax_hz = nyquist;
		}
		if (df_hz <= 0.0) {
			df_hz = f0_median / 2.0;
		}
		if (relative_bw <= 0.0) {
			relative_bw = 1.1;
		}

		fmax_hz = std::min (fmax_hz, nyquist);
		integer numberOfFilters = Melder_iround ( (fmax_hz - f1_hz) / df_hz);

		Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
		autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz);

		// Temporary objects

		autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
		autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
		autoMelderProgress progress (U"Sound & Pitch: To FormantFilter");
		for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
			double t = Sampled_indexToX (him.get(), iframe);
			double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit::HERTZ, 0);

			if (isundef (f0) || f0 == 0.0) {
				numberOfUndefinedPitchFrames ++;
				f0 = f0_median;
			}
			b = relative_bw * f0;
			Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
			Sounds_multiply (sframe.get(), window.get());

			Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b);

			if (iframe % 10 == 1) {
				Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ",
					numberOfFrames, U".");
			}
		}
		
		_Spectrogram_windowCorrection (him.get(), window -> nx);

		return him;
	} catch (MelderError) {
		Melder_throw (U"FormantFilter not created from Pitch & FormantFilter.");
	}
}

autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int unit) {
	try {
		autoSound thee = Sound_create (my ny, my xmin, my xmax, my nx, my dx, my x1);
		for (integer i = 1; i <= my ny; i ++) {
			for (integer j = 1; j <= my nx; j ++)
				thy z [i] [j] = my v_getValueAtSample (j, i, unit);
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": no Sound created.");
	}
}

autoSound BandFilterSpectrograms_crossCorrelate (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
	try {
		autoSound sme = BandFilterSpectrogram_as_Sound (me, 1);   // to dB
		autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
		autoSound cc = Sounds_crossCorrelate (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
		return cc;
	} catch (MelderError) {
		Melder_throw (me, U" and ", thee, U" not cross-correlated.");
	}
}

autoSound BandFilterSpectrograms_convolve (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
	try {
		autoSound sme = BandFilterSpectrogram_as_Sound (me, 1);   // to dB
		autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
		autoSound cc = Sounds_convolve (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
		return cc;
	} catch (MelderError) {
		Melder_throw (me, U" and ", thee, U" not convolved.");
	}
}

/* End of file Sound_and_Spectrogram_extensions.cpp */