test_mfcc.cpp 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /*
  2. Sekai - addons for the WORLD speech toolkit
  3. Copyright (C) 2016 Tobias Platen
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. // Copyright 2012-2015 Masanori Morise. All Rights Reserved.
  16. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  17. //
  18. // Test program for WORLD 0.1.2 (2012/08/19)
  19. // Test program for WORLD 0.1.3 (2013/07/26)
  20. // Test program for WORLD 0.1.4 (2014/04/29)
  21. // Test program for WORLD 0.1.4_3 (2015/03/07)
  22. // Test program for WORLD 0.2.0 (2015/05/29)
  23. // Test program for WORLD 0.2.0_1 (2015/05/31)
  24. // Test program for WORLD 0.2.0_2 (2015/06/06)
  25. // Test program for WORLD 0.2.0_3 (2015/07/28)
  26. // test.exe input.wav outout.wav f0 spec
  27. // input.wav : Input file
  28. // output.wav : Output file
  29. // f0 : F0 scaling (a positive number)
  30. // spec : Formant scaling (a positive number)
  31. #include <math.h>
  32. #include <stdio.h>
  33. #include <stdlib.h>
  34. #include <string.h>
  35. #if (defined(__WIN32__) || defined(_WIN32)) && !defined(__MINGW32__)
  36. #include <conio.h>
  37. #include <windows.h>
  38. #pragma comment(lib, "winmm.lib")
  39. #pragma warning(disable : 4996)
  40. #endif
  41. #if (defined(__linux__) || defined(__CYGWIN__) || defined(__APPLE__))
  42. #include <stdint.h>
  43. #include <sys/time.h>
  44. #endif
  45. #include "mfcc.h"
  46. #include "note.h"
  47. #include "world/cheaptrick.h"
  48. #include "world/constantnumbers.h"
  49. #include "world/d4c.h" // This is the new function.
  50. #include "world/dio.h"
  51. #include "world/matlabfunctions.h"
  52. #include "world/stonemask.h"
  53. #include "world/synthesis.h"
  54. //// from haruneko
  55. ////
  56. // Frame shift [msec]
  57. #define FRAMEPERIOD 5.0
  58. #if (defined(__linux__) || defined(__CYGWIN__) || defined(__APPLE__))
  59. // POSIX porting section: implement timeGetTime() by gettimeofday(),
  60. #ifndef DWORD
  61. #define DWORD uint32_t
  62. #endif
  63. DWORD timeGetTime() {
  64. struct timeval tv;
  65. gettimeofday(&tv, NULL);
  66. DWORD ret = static_cast<DWORD>(tv.tv_usec / 1000 + tv.tv_sec * 1000);
  67. return ret;
  68. }
  69. #endif
  70. namespace {
  71. bool CheckLoadedFile(double *x, int fs, int nbit, int x_length) {
  72. if (x == NULL) {
  73. printf("error: File not found.\n");
  74. return false;
  75. }
  76. printf("File information\n");
  77. printf("Sampling : %d Hz %d Bit\n", fs, nbit);
  78. printf("Length %d [sample]\n", x_length);
  79. printf("Length %f [sec]\n", static_cast<double>(x_length) / fs);
  80. return true;
  81. }
  82. void F0Estimation(double *x, int x_length, int fs, int f0_length, double *f0,
  83. double *time_axis) {
  84. double *refined_f0 = new double[f0_length];
  85. DioOption option;
  86. InitializeDioOption(&option); // Initialize the option
  87. // Modification of the option
  88. option.frame_period = FRAMEPERIOD;
  89. // Valuable option.speed represents the ratio for downsampling.
  90. // The signal is downsampled to fs / speed Hz.
  91. // If you want to obtain the accurate result, speed should be set to 1.
  92. option.speed = 1;
  93. // You should not set option.f0_floor to under world::kFloorF0.
  94. // If you want to analyze such low F0 speech, please change world::kFloorF0.
  95. // Processing speed may sacrify, provided that the FFT length changes.
  96. option.f0_floor = 71.0;
  97. // You can give a positive real number as the threshold.
  98. // Most strict value is 0, but almost all results are counted as unvoiced.
  99. // The value from 0.02 to 0.2 would be reasonable.
  100. option.allowed_range = 0.1;
  101. printf("\nAnalysis\n");
  102. DWORD elapsed_time = timeGetTime();
  103. Dio(x, x_length, fs, option, time_axis, f0);
  104. printf("DIO: %d [msec]\n", timeGetTime() - elapsed_time);
  105. // StoneMask is carried out to improve the estimation performance.
  106. elapsed_time = timeGetTime();
  107. StoneMask(x, x_length, fs, time_axis, f0, f0_length, refined_f0);
  108. printf("StoneMask: %d [msec]\n", timeGetTime() - elapsed_time);
  109. for (int i = 0; i < f0_length; ++i) f0[i] = refined_f0[i];
  110. // for (int i = 0; i < f0_length; ++i) if(f0[i])==) f0[i]=0;
  111. delete[] refined_f0;
  112. return;
  113. }
  114. void SpectralEnvelopeEstimation(double *x, int x_length, int fs,
  115. double *time_axis, double *f0, int f0_length,
  116. double **spectrogram) {
  117. CheapTrickOption option;
  118. InitializeCheapTrickOption(&option); // Initialize the option
  119. option.q1 = -0.15; // This value may be better one for HMM speech synthesis.
  120. DWORD elapsed_time = timeGetTime();
  121. CheapTrick(x, x_length, fs, time_axis, f0, f0_length, &option, spectrogram);
  122. printf("CheapTrick: %d [msec]\n", timeGetTime() - elapsed_time);
  123. int fft_size = GetFFTSizeForCheapTrick(fs);
  124. }
  125. void AperiodicityEstimation(double *x, int x_length, int fs, double *time_axis,
  126. double *f0, int f0_length, int fft_size,
  127. double **aperiodicity) {
  128. D4COption option;
  129. InitializeD4COption(&option); // Initialize the option
  130. DWORD elapsed_time = timeGetTime();
  131. // option is not implemented in this version. This is for future update.
  132. // We can use "NULL" as the argument.
  133. D4C(x, x_length, fs, time_axis, f0, f0_length, fft_size, &option,
  134. aperiodicity);
  135. printf("D4C: %d [msec]\n", timeGetTime() - elapsed_time);
  136. }
  137. void WaveformSynthesis(double *f0, int f0_length, double **spectrogram,
  138. double **aperiodicity, int fft_size, double frame_period,
  139. int fs, int y_length, double *y) {
  140. DWORD elapsed_time;
  141. // Synthesis by the aperiodicity
  142. printf("\nSynthesis\n");
  143. elapsed_time = timeGetTime();
  144. Synthesis(f0, f0_length, spectrogram, aperiodicity, fft_size, FRAMEPERIOD, fs,
  145. y_length, y);
  146. printf("WORLD: %d [msec]\n", timeGetTime() - elapsed_time);
  147. }
  148. void CompressTest(double **spectrogram, double **aperiodicity, int f0_length,
  149. int fs, int fft_size, int cepstrum_length,
  150. float **mel_cepstrum1, float **mel_cepstrum2,
  151. double **spectrogram_test, double **aperiodicity_test) {
  152. DWORD elapsed_time;
  153. // Synthesis by the aperiodicity
  154. printf("\nCompress\n");
  155. elapsed_time = timeGetTime();
  156. MFCCCompress(spectrogram, f0_length, fs, fft_size, cepstrum_length,
  157. mel_cepstrum1);
  158. MFCCCompress(aperiodicity, f0_length, fs, fft_size, cepstrum_length,
  159. mel_cepstrum2);
  160. MFCCDecompress(spectrogram_test, f0_length, fs, fft_size, cepstrum_length,
  161. mel_cepstrum1, false);
  162. MFCCDecompress(aperiodicity_test, f0_length, fs, fft_size, cepstrum_length,
  163. mel_cepstrum2, true);
  164. printf("WORLD: %d [msec]\n", timeGetTime() - elapsed_time);
  165. }
  166. } // namespace
  167. //-----------------------------------------------------------------------------
  168. // Test program.
  169. // test.exe input.wav outout.wav f0 spec flag
  170. // input.wav : argv[1] Input file
  171. // output.vvd : argv[2] Output file
  172. //-----------------------------------------------------------------------------
  173. #include "vvd.h"
  174. int main(int argc, char *argv[]) {
  175. if (argc != 3) {
  176. printf("usage: world_test2 infile.wav outfile.wav\n");
  177. return 0;
  178. }
  179. int fs, nbit, x_length;
  180. double *x = wavread(argv[1], &fs, &nbit, &x_length);
  181. if (CheckLoadedFile(x, fs, nbit, x_length) == false) {
  182. printf("error: File not found.\n");
  183. return 0;
  184. }
  185. // Allocate memories
  186. // The number of samples for F0
  187. int f0_length = GetSamplesForDIO(fs, x_length, FRAMEPERIOD);
  188. double *f0 = new double[f0_length];
  189. double *time_axis = new double[f0_length];
  190. int cepstrum_length = 32;
  191. // FFT size for CheapTrick
  192. int fft_size = GetFFTSizeForCheapTrick(fs);
  193. double **spectrogram = new double *[f0_length];
  194. double **aperiodicity = new double *[f0_length];
  195. double **spectrogram_test = new double *[f0_length];
  196. double **aperiodicity_test = new double *[f0_length];
  197. float **mel_cepstrum1 = new float *[f0_length];
  198. float **mel_cepstrum2 = new float *[f0_length];
  199. for (int i = 0; i < f0_length; ++i) {
  200. spectrogram[i] = new double[fft_size / 2 + 1];
  201. aperiodicity[i] = new double[fft_size / 2 + 1];
  202. spectrogram_test[i] = new double[fft_size / 2];
  203. aperiodicity_test[i] = new double[fft_size / 2];
  204. mel_cepstrum1[i] = new float[cepstrum_length];
  205. mel_cepstrum2[i] = new float[cepstrum_length];
  206. }
  207. // F0 estimation
  208. F0Estimation(x, x_length, fs, f0_length, f0, time_axis);
  209. // Spectral envelope estimation
  210. SpectralEnvelopeEstimation(x, x_length, fs, time_axis, f0, f0_length,
  211. spectrogram);
  212. // Aperiodicity estimation by D4C
  213. AperiodicityEstimation(x, x_length, fs, time_axis, f0, f0_length, fft_size,
  214. aperiodicity);
  215. CompressTest(spectrogram, aperiodicity, f0_length, fs, fft_size,
  216. cepstrum_length, mel_cepstrum1, mel_cepstrum2, spectrogram_test,
  217. aperiodicity_test);
  218. // The length of the output waveform
  219. int y_length =
  220. static_cast<int>((f0_length - 1) * FRAMEPERIOD / 1000.0 * fs) + 1;
  221. double *y = new double[y_length];
  222. // Synthesis
  223. WaveformSynthesis(f0, f0_length, spectrogram_test, aperiodicity_test,
  224. fft_size, FRAMEPERIOD, fs, y_length, y);
  225. // Output
  226. wavwrite(y, y_length, fs, 16, argv[2]);
  227. printf("complete.\n");
  228. printf("complete.\n");
  229. delete[] x;
  230. delete[] time_axis;
  231. delete[] f0;
  232. for (int i = 0; i < f0_length; ++i) {
  233. delete[] spectrogram[i];
  234. delete[] aperiodicity[i];
  235. delete[] mel_cepstrum1[i];
  236. delete[] mel_cepstrum2[i];
  237. }
  238. delete[] spectrogram;
  239. delete[] aperiodicity;
  240. delete[] mel_cepstrum1;
  241. delete[] mel_cepstrum2;
  242. return 0;
  243. }