Sound_and_Spectrogram_extensions.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. /* Sound_and_Spectrogram_extensions.cpp
  2. *
  3. * Copyright (C) 1993-2017 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20010718
  20. djmw 20020813 GPL header.
  21. djmw 20041124 Changed call to Sound_to_Spectrum.
  22. djmw 20070103 Sound interface changes
  23. djmw 20071107 Errors/warnings text changes
  24. djmw 20071202 Melder_warning<n>
  25. */
  26. #include "Sound_and_Spectrogram_extensions.h"
  27. #include "Sound_extensions.h"
  28. #include "Sound_and_Spectrum.h"
  29. #include "Sound_to_Pitch.h"
  30. #include "Vector.h"
  31. #include "NUM2.h"
  32. autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int to_dB);
  33. /*
  34. The gaussian(x) = (exp(-48*((i-(n+1)/2)/(n+1))^2)-exp(-12))/(1-exp(-12));
  35. For power we need the area under the square of this window:
  36. Integrate (gaussian(i)^2,i=1..n) =
  37. (sqrt(Pi)*sqrt(3)*sqrt(2)*erf(2*(n-1)*sqrt(3)*sqrt(2)/(n+1))*(n+1) + 24*exp(-24)*(n-1)+
  38. -4*sqrt(Pi)*sqrt(3)*exp(-12)*erf(2*(n-1)*sqrt(3)/(n+1))*(n+1))/ (24 * (-1+exp(-12))^2),
  39. where erf(x) = 1 - erfc(x) and n is the windowLength in samples.
  40. To compare with the rectangular window we need to divide this by the window width (n -1) x 1^2.
  41. */
  42. static void _Spectrogram_windowCorrection (Spectrogram me, integer numberOfSamples_window) {
  43. double windowFactor = 1.0;
  44. if (numberOfSamples_window > 1) {
  45. double e12 = exp (-12);
  46. double denum = (e12 - 1) * (e12 - 1.0) * 24 * (numberOfSamples_window - 1);
  47. double arg1 = 2.0 * NUMsqrt3 * (numberOfSamples_window - 1) / (numberOfSamples_window + 1);
  48. double arg2 = arg1 * NUMsqrt2;
  49. double p2 = NUMsqrtpi * NUMsqrt3 * NUMsqrt2 * (1 - NUMerfcc (arg2)) * (numberOfSamples_window + 1);
  50. double p1 = 4 * NUMsqrtpi * NUMsqrt3 * e12 * (1 - NUMerfcc (arg1)) * (numberOfSamples_window + 1);
  51. windowFactor = (p2 - p1 + 24 * (numberOfSamples_window - 1) * e12 * e12) / denum;
  52. }
  53. for (integer i = 1; i <= my ny; i ++) {
  54. for (integer j = 1; j <= my nx; j ++) {
  55. my z [i] [j] /= windowFactor;
  56. }
  57. }
  58. }
  59. static autoSpectrum Sound_to_Spectrum_power (Sound me) {
  60. try {
  61. autoSpectrum thee = Sound_to_Spectrum (me, true);
  62. double scale = 2.0 * thy dx / (my xmax - my xmin);
  63. // factor '2' because we combine positive and negative frequencies
  64. // thy dx : width of frequency bin
  65. // my xmax - my xmin : duration of sound
  66. double *re = thy z [1], *im = thy z [2];
  67. for (integer i = 1; i <= thy nx; i ++) {
  68. double power = scale * (re [i] * re [i] + im [i] * im [i]);
  69. re [i] = power; im [i] = 0;
  70. }
  71. // Correction of frequency bins at 0 Hz and nyquist: don't count for two.
  72. re [1] *= 0.5; re [thy nx] *= 0.5;
  73. return thee;
  74. } catch (MelderError) {
  75. Melder_throw (me, U": no Spectrum with spectral power created.");
  76. }
  77. }
  78. static void Sound_into_BarkSpectrogram_frame (Sound me, BarkSpectrogram thee, integer frame) {
  79. autoSpectrum him = Sound_to_Spectrum_power (me);
  80. integer numberOfFrequencies = his nx;
  81. autoNUMvector<double> z (1, numberOfFrequencies);
  82. for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
  83. double fhz = his x1 + (ifreq - 1) * his dx;
  84. z [ifreq] = thy v_hertzToFrequency (fhz);
  85. }
  86. for (integer i = 1; i <= thy ny; i ++) {
  87. double p = 0;
  88. double z0 = thy y1 + (i - 1) * thy dy;
  89. double *pow = his z [1]; // TODO ??
  90. for (integer ifreq = 1; ifreq <= numberOfFrequencies; ifreq ++) {
  91. // Sekey & Hanson filter is defined in the power domain.
  92. // We therefore multiply the power with a (and not a^2).
  93. // integral (F(z),z=0..25) = 1.58/9
  94. double a = NUMsekeyhansonfilter_amplitude (z0, z [ifreq]);
  95. p += a * pow [ifreq] ;
  96. }
  97. thy z [i] [frame] = p;
  98. }
  99. }
  100. autoBarkSpectrogram Sound_to_BarkSpectrogram (Sound me, double analysisWidth, double dt, double f1_bark, double fmax_bark, double df_bark) {
  101. try {
  102. double nyquist = 0.5 / my dx, samplingFrequency = 2 * nyquist;
  103. double windowDuration = 2.0 * analysisWidth; /* gaussian window */
  104. double zmax = NUMhertzToBark2 (nyquist);
  105. double fmin_bark = 0;
  106. // Check defaults.
  107. if (f1_bark <= 0.0) {
  108. f1_bark = 1.0;
  109. }
  110. if (fmax_bark <= 0.0) {
  111. fmax_bark = zmax;
  112. }
  113. if (df_bark <= 0) {
  114. df_bark = 1.0;
  115. }
  116. fmax_bark = std::min (fmax_bark, zmax);
  117. integer numberOfFilters = Melder_iround ( (fmax_bark - f1_bark) / df_bark);
  118. if (numberOfFilters <= 0) {
  119. Melder_throw (U"The combination of filter parameters is not valid.");
  120. }
  121. integer numberOfFrames;
  122. double t1;
  123. Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
  124. autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
  125. autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
  126. autoBarkSpectrogram thee = BarkSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_bark, fmax_bark, numberOfFilters, df_bark, f1_bark);
  127. autoMelderProgress progess (U"BarkSpectrogram analysis");
  128. for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
  129. double t = Sampled_indexToX (thee.get(), iframe);
  130. Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
  131. Sounds_multiply (sframe.get(), window.get());
  132. Sound_into_BarkSpectrogram_frame (sframe.get(), thee.get(), iframe);
  133. if (iframe % 10 == 1) {
  134. Melder_progress ( (double) iframe / numberOfFrames, U"BarkSpectrogram analysis: frame ",
  135. iframe, U" from ", numberOfFrames, U".");
  136. }
  137. }
  138. _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);
  139. return thee;
  140. } catch (MelderError) {
  141. Melder_throw (me, U": no BarkSpectrogram created.");
  142. }
  143. }
  144. static void Sound_into_MelSpectrogram_frame (Sound me, MelSpectrogram thee, integer frame) {
  145. autoSpectrum him = Sound_to_Spectrum_power (me);
  146. for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
  147. double power = 0;
  148. double fc_mel = thy y1 + (ifilter - 1) * thy dy;
  149. double fc_hz = thy v_frequencyToHertz (fc_mel);
  150. double fl_hz = thy v_frequencyToHertz (fc_mel - thy dy);
  151. double fh_hz = thy v_frequencyToHertz (fc_mel + thy dy);
  152. integer ifrom, ito;
  153. Sampled_getWindowSamples (him.get(), fl_hz, fh_hz, & ifrom, & ito);
  154. for (integer i = ifrom; i <= ito; i ++) {
  155. // Bin with a triangular filter the power (= amplitude-squared)
  156. double f = his x1 + (i - 1) * his dx;
  157. double a = NUMtriangularfilter_amplitude (fl_hz, fc_hz, fh_hz, f);
  158. power += a * his z [1] [i];
  159. }
  160. thy z [ifilter] [frame] = power;
  161. }
  162. }
  163. autoMelSpectrogram Sound_to_MelSpectrogram (Sound me, double analysisWidth, double dt, double f1_mel, double fmax_mel, double df_mel) {
  164. try {
  165. double samplingFrequency = 1.0 / my dx, nyquist = 0.5 * samplingFrequency;
  166. double windowDuration = 2.0 * analysisWidth; // Gaussian window
  167. double fmin_mel = 0.0;
  168. double fbottom = NUMhertzToMel2 (100.0), fceiling = NUMhertzToMel2 (nyquist);
  169. // Check defaults.
  170. if (fmax_mel <= 0.0 || fmax_mel > fceiling) {
  171. fmax_mel = fceiling;
  172. }
  173. if (fmax_mel <= f1_mel) {
  174. f1_mel = fbottom; fmax_mel = fceiling;
  175. }
  176. if (f1_mel <= 0.0) {
  177. f1_mel = fbottom;
  178. }
  179. if (df_mel <= 0.0) {
  180. df_mel = 100.0;
  181. }
  182. // Determine the number of filters.
  183. integer numberOfFilters = Melder_iround ((fmax_mel - f1_mel) / df_mel);
  184. fmax_mel = f1_mel + numberOfFilters * df_mel;
  185. integer numberOfFrames;
  186. double t1;
  187. Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
  188. autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
  189. autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
  190. autoMelSpectrogram thee = MelSpectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_mel, fmax_mel, numberOfFilters, df_mel, f1_mel);
  191. autoMelderProgress progress (U"MelSpectrograms analysis");
  192. for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
  193. double t = Sampled_indexToX (thee.get(), iframe);
  194. Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
  195. Sounds_multiply (sframe.get(), window.get());
  196. Sound_into_MelSpectrogram_frame (sframe.get(), thee.get(), iframe);
  197. if (iframe % 10 == 1) {
  198. Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ", numberOfFrames, U".");
  199. }
  200. }
  201. _Spectrogram_windowCorrection ((Spectrogram) thee.get(), window -> nx);
  202. return thee;
  203. } catch (MelderError) {
  204. Melder_throw (me, U": no MelSpectrogram created.");
  205. }
  206. }
  207. /*
  208. Analog formant filter response :
  209. H(f) = i f B / (f1^2 - f^2 + i f B)
  210. */
  211. static int Sound_into_Spectrogram_frame (Sound me, Spectrogram thee, integer frame, double bw) {
  212. Melder_assert (bw > 0);
  213. autoSpectrum him = Sound_to_Spectrum_power (me);
  214. for (integer ifilter = 1; ifilter <= thy ny; ifilter ++) {
  215. double p = 0;
  216. double fc = thy y1 + (ifilter - 1) * thy dy;
  217. double *pow = his z [1];
  218. for (integer ifreq = 1; ifreq <= his nx; ifreq ++) {
  219. /*
  220. H(f) = ifB / (fc^2 - f^2 + ifB)
  221. H(f)| = fB / sqrt ((fc^2 - f^2)^2 + f^2B^2)
  222. |H(f)|^2 = f^2B^2 / ((fc^2 - f^2)^2 + f^2B^2)
  223. = 1 / (((fc^2 - f^2) /fB)^2 + 1)
  224. */
  225. double f = his x1 + (ifreq - 1) * his dx;
  226. double a = NUMformantfilter_amplitude (fc, bw, f);
  227. p += a * pow [ifreq];
  228. }
  229. thy z [ifilter] [frame] = p;
  230. }
  231. return 1;
  232. }
  233. autoSpectrogram Sound_to_Spectrogram_pitchDependent (Sound me, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw, double minimumPitch, double maximumPitch) {
  234. try {
  235. double floor = 80.0, ceiling = 600.0;
  236. if (minimumPitch >= maximumPitch) {
  237. minimumPitch = floor;
  238. maximumPitch = ceiling;
  239. }
  240. if (minimumPitch <= 0.0) {
  241. minimumPitch = floor;
  242. }
  243. if (maximumPitch <= 0.0) {
  244. maximumPitch = ceiling;
  245. }
  246. autoPitch thee = Sound_to_Pitch (me, dt, minimumPitch, maximumPitch);
  247. autoSpectrogram ff = Sound_Pitch_to_Spectrogram (me, thee.get(), analysisWidth, dt, f1_hz, fmax_hz, df_hz, relative_bw);
  248. return ff;
  249. } catch (MelderError) {
  250. Melder_throw (me, U": no Spectrogram created.");
  251. }
  252. }
  253. autoSpectrogram Sound_Pitch_to_Spectrogram (Sound me, Pitch thee, double analysisWidth, double dt, double f1_hz, double fmax_hz, double df_hz, double relative_bw) {
  254. try {
  255. double t1, windowDuration = 2.0 * analysisWidth; /* gaussian window */
  256. double nyquist = 0.5 / my dx, samplingFrequency = 2.0 * nyquist, fmin_hz = 0.0;
  257. integer numberOfFrames, numberOfUndefinedPitchFrames = 0;
  258. Melder_require (my xmin >= thy xmin && my xmax <= thy xmax,
  259. U"The domain of the Sound should be included in the domain of the Pitch.");
  260. double f0_median = Pitch_getQuantile (thee, thy xmin, thy xmax, 0.5, kPitch_unit::HERTZ);
  261. if (isundef (f0_median) || f0_median == 0.0) {
  262. f0_median = 100.0;
  263. Melder_warning (U"Pitch values undefined. Bandwith fixed to 100 Hz. ");
  264. }
  265. if (f1_hz <= 0.0) {
  266. f1_hz = 100.0;
  267. }
  268. if (fmax_hz <= 0.0) {
  269. fmax_hz = nyquist;
  270. }
  271. if (df_hz <= 0.0) {
  272. df_hz = f0_median / 2.0;
  273. }
  274. if (relative_bw <= 0.0) {
  275. relative_bw = 1.1;
  276. }
  277. fmax_hz = std::min (fmax_hz, nyquist);
  278. integer numberOfFilters = Melder_iround ( (fmax_hz - f1_hz) / df_hz);
  279. Sampled_shortTermAnalysis (me, windowDuration, dt, & numberOfFrames, & t1);
  280. autoSpectrogram him = Spectrogram_create (my xmin, my xmax, numberOfFrames, dt, t1, fmin_hz, fmax_hz, numberOfFilters, df_hz, f1_hz);
  281. // Temporary objects
  282. autoSound sframe = Sound_createSimple (1, windowDuration, samplingFrequency);
  283. autoSound window = Sound_createGaussian (windowDuration, samplingFrequency);
  284. autoMelderProgress progress (U"Sound & Pitch: To FormantFilter");
  285. for (integer iframe = 1; iframe <= numberOfFrames; iframe ++) {
  286. double t = Sampled_indexToX (him.get(), iframe);
  287. double b, f0 = Pitch_getValueAtTime (thee, t, kPitch_unit::HERTZ, 0);
  288. if (isundef (f0) || f0 == 0.0) {
  289. numberOfUndefinedPitchFrames ++;
  290. f0 = f0_median;
  291. }
  292. b = relative_bw * f0;
  293. Sound_into_Sound (me, sframe.get(), t - windowDuration / 2.0);
  294. Sounds_multiply (sframe.get(), window.get());
  295. Sound_into_Spectrogram_frame (sframe.get(), him.get(), iframe, b);
  296. if (iframe % 10 == 1) {
  297. Melder_progress ((double) iframe / numberOfFrames, U"Frame ", iframe, U" out of ",
  298. numberOfFrames, U".");
  299. }
  300. }
  301. _Spectrogram_windowCorrection (him.get(), window -> nx);
  302. return him;
  303. } catch (MelderError) {
  304. Melder_throw (U"FormantFilter not created from Pitch & FormantFilter.");
  305. }
  306. }
  307. autoSound BandFilterSpectrogram_as_Sound (BandFilterSpectrogram me, int unit) {
  308. try {
  309. autoSound thee = Sound_create (my ny, my xmin, my xmax, my nx, my dx, my x1);
  310. for (integer i = 1; i <= my ny; i ++) {
  311. for (integer j = 1; j <= my nx; j ++)
  312. thy z [i] [j] = my v_getValueAtSample (j, i, unit);
  313. }
  314. return thee;
  315. } catch (MelderError) {
  316. Melder_throw (me, U": no Sound created.");
  317. }
  318. }
  319. autoSound BandFilterSpectrograms_crossCorrelate (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
  320. try {
  321. autoSound sme = BandFilterSpectrogram_as_Sound (me, 1); // to dB
  322. autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
  323. autoSound cc = Sounds_crossCorrelate (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
  324. return cc;
  325. } catch (MelderError) {
  326. Melder_throw (me, U" and ", thee, U" not cross-correlated.");
  327. }
  328. }
  329. autoSound BandFilterSpectrograms_convolve (BandFilterSpectrogram me, BandFilterSpectrogram thee, enum kSounds_convolve_scaling scaling, enum kSounds_convolve_signalOutsideTimeDomain signalOutsideTimeDomain) {
  330. try {
  331. autoSound sme = BandFilterSpectrogram_as_Sound (me, 1); // to dB
  332. autoSound sthee = BandFilterSpectrogram_as_Sound (thee, 1);
  333. autoSound cc = Sounds_convolve (sme.get(), sthee.get(), scaling, signalOutsideTimeDomain);
  334. return cc;
  335. } catch (MelderError) {
  336. Melder_throw (me, U" and ", thee, U" not convolved.");
  337. }
  338. }
  339. /* End of file Sound_and_Spectrogram_extensions.cpp */