cheaptrick.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2012 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2017/02/01
  5. //
  6. // Spectral envelope estimation on the basis of the idea of CheapTrick.
  7. //-----------------------------------------------------------------------------
  8. #include "world/cheaptrick.h"
  9. #include <math.h>
  10. #include "world/common.h"
  11. #include "world/constantnumbers.h"
  12. #include "world/matlabfunctions.h"
  13. namespace {
  14. //-----------------------------------------------------------------------------
  15. // SmoothingWithRecovery() carries out the spectral smoothing and spectral
  16. // recovery on the Cepstrum domain.
  17. //-----------------------------------------------------------------------------
  18. static void SmoothingWithRecovery(double f0, int fs, int fft_size, double q1,
  19. const ForwardRealFFT *forward_real_fft,
  20. const InverseRealFFT *inverse_real_fft, double *spectral_envelope) {
  21. double *smoothing_lifter = new double[fft_size];
  22. double *compensation_lifter = new double[fft_size];
  23. smoothing_lifter[0] = 1.0;
  24. compensation_lifter[0] = (1.0 - 2.0 * q1) + 2.0 * q1;
  25. double quefrency;
  26. for (int i = 1; i <= forward_real_fft->fft_size / 2; ++i) {
  27. quefrency = static_cast<double>(i) / fs;
  28. smoothing_lifter[i] = sin(world::kPi * f0 * quefrency) /
  29. (world::kPi * f0 * quefrency);
  30. compensation_lifter[i] = (1.0 - 2.0 * q1) + 2.0 * q1 *
  31. cos(2.0 * world::kPi * quefrency * f0);
  32. }
  33. for (int i = 0; i <= fft_size / 2; ++i)
  34. forward_real_fft->waveform[i] = log(forward_real_fft->waveform[i]);
  35. for (int i = 1; i < fft_size / 2; ++i)
  36. forward_real_fft->waveform[fft_size - i] = forward_real_fft->waveform[i];
  37. fft_execute(forward_real_fft->forward_fft);
  38. for (int i = 0; i <= fft_size / 2; ++i) {
  39. inverse_real_fft->spectrum[i][0] = forward_real_fft->spectrum[i][0] *
  40. smoothing_lifter[i] * compensation_lifter[i] / fft_size;
  41. inverse_real_fft->spectrum[i][1] = 0.0;
  42. }
  43. fft_execute(inverse_real_fft->inverse_fft);
  44. for (int i = 0; i <= fft_size / 2; ++i)
  45. spectral_envelope[i] = exp(inverse_real_fft->waveform[i]);
  46. delete[] smoothing_lifter;
  47. delete[] compensation_lifter;
  48. }
  49. //-----------------------------------------------------------------------------
  50. // GetPowerSpectrum() calculates the power_spectrum with DC correction.
  51. // DC stands for Direct Current. In this case, the component from 0 to F0 Hz
  52. // is corrected.
  53. //-----------------------------------------------------------------------------
  54. static void GetPowerSpectrum(int fs, double f0, int fft_size,
  55. const ForwardRealFFT *forward_real_fft) {
  56. int half_window_length = matlab_round(1.5 * fs / f0);
  57. // FFT
  58. for (int i = half_window_length * 2 + 1; i < fft_size; ++i)
  59. forward_real_fft->waveform[i] = 0.0;
  60. fft_execute(forward_real_fft->forward_fft);
  61. // Calculation of the power spectrum.
  62. double *power_spectrum = forward_real_fft->waveform;
  63. for (int i = 0; i <= fft_size / 2; ++i)
  64. power_spectrum[i] =
  65. forward_real_fft->spectrum[i][0] * forward_real_fft->spectrum[i][0] +
  66. forward_real_fft->spectrum[i][1] * forward_real_fft->spectrum[i][1];
  67. // DC correction
  68. DCCorrection(power_spectrum, f0, fs, fft_size, power_spectrum);
  69. }
  70. //-----------------------------------------------------------------------------
  71. // SetParametersForGetWindowedWaveform()
  72. //-----------------------------------------------------------------------------
  73. static void SetParametersForGetWindowedWaveform(int half_window_length,
  74. int x_length, double currnet_position, int fs, double current_f0,
  75. int *base_index, int *safe_index, double *window) {
  76. for (int i = -half_window_length; i <= half_window_length; ++i)
  77. base_index[i + half_window_length] = i;
  78. int origin = matlab_round(currnet_position * fs + 0.001);
  79. for (int i = 0; i <= half_window_length * 2; ++i)
  80. safe_index[i] =
  81. MyMinInt(x_length - 1, MyMaxInt(0, origin + base_index[i]));
  82. // Designing of the window function
  83. double average = 0.0;
  84. double position;
  85. for (int i = 0; i <= half_window_length * 2; ++i) {
  86. position = base_index[i] / 1.5 / fs;
  87. window[i] = 0.5 * cos(world::kPi * position * current_f0) + 0.5;
  88. average += window[i] * window[i];
  89. }
  90. average = sqrt(average);
  91. for (int i = 0; i <= half_window_length * 2; ++i) window[i] /= average;
  92. }
  93. //-----------------------------------------------------------------------------
  94. // GetWindowedWaveform() windows the waveform by F0-adaptive window
  95. //-----------------------------------------------------------------------------
  96. static void GetWindowedWaveform(const double *x, int x_length, int fs,
  97. double current_f0, double currnet_position,
  98. const ForwardRealFFT *forward_real_fft) {
  99. int half_window_length = matlab_round(1.5 * fs / current_f0);
  100. int *base_index = new int[half_window_length * 2 + 1];
  101. int *safe_index = new int[half_window_length * 2 + 1];
  102. double *window = new double[half_window_length * 2 + 1];
  103. SetParametersForGetWindowedWaveform(half_window_length, x_length,
  104. currnet_position, fs, current_f0, base_index, safe_index, window);
  105. // F0-adaptive windowing
  106. double *waveform = forward_real_fft->waveform;
  107. for (int i = 0; i <= half_window_length * 2; ++i)
  108. waveform[i] = x[safe_index[i]] * window[i] +
  109. randn() * world::kMySafeGuardMinimum;
  110. double tmp_weight1 = 0;
  111. double tmp_weight2 = 0;
  112. for (int i = 0; i <= half_window_length * 2; ++i) {
  113. tmp_weight1 += waveform[i];
  114. tmp_weight2 += window[i];
  115. }
  116. double weighting_coefficient = tmp_weight1 / tmp_weight2;
  117. for (int i = 0; i <= half_window_length * 2; ++i)
  118. waveform[i] -= window[i] * weighting_coefficient;
  119. delete[] base_index;
  120. delete[] safe_index;
  121. delete[] window;
  122. }
  123. //-----------------------------------------------------------------------------
  124. // AddInfinitesimalNoise()
  125. //-----------------------------------------------------------------------------
  126. static void AddInfinitesimalNoise(const double *input_spectrum, int fft_size,
  127. double *output_spectrum) {
  128. for (int i = 0; i <= fft_size / 2; ++i)
  129. output_spectrum[i] = input_spectrum[i] + fabs(randn()) * world::kEps;
  130. }
  131. //-----------------------------------------------------------------------------
  132. // CheapTrickGeneralBody() calculates a spectral envelope at a temporal
  133. // position. This function is only used in CheapTrick().
  134. // Caution:
  135. // forward_fft is allocated in advance to speed up the processing.
  136. //-----------------------------------------------------------------------------
  137. static void CheapTrickGeneralBody(const double *x, int x_length, int fs,
  138. double current_f0, int fft_size, double current_position, double q1,
  139. const ForwardRealFFT *forward_real_fft,
  140. const InverseRealFFT *inverse_real_fft, double *spectral_envelope) {
  141. // F0-adaptive windowing
  142. GetWindowedWaveform(x, x_length, fs, current_f0, current_position,
  143. forward_real_fft);
  144. // Calculate power spectrum with DC correction
  145. // Note: The calculated power spectrum is stored in an array for waveform.
  146. // In this imprementation, power spectrum is transformed by FFT (NOT IFFT).
  147. // However, the same result is obtained.
  148. // This is tricky but important for simple implementation.
  149. GetPowerSpectrum(fs, current_f0, fft_size, forward_real_fft);
  150. // Smoothing of the power (linear axis)
  151. // forward_real_fft.waveform is the power spectrum.
  152. LinearSmoothing(forward_real_fft->waveform, current_f0 * 2.0 / 3.0,
  153. fs, fft_size, forward_real_fft->waveform);
  154. // Add infinitesimal noise
  155. // This is a safeguard to avoid including zero in the spectrum.
  156. AddInfinitesimalNoise(forward_real_fft->waveform, fft_size,
  157. forward_real_fft->waveform);
  158. // Smoothing (log axis) and spectral recovery on the cepstrum domain.
  159. SmoothingWithRecovery(current_f0, fs, fft_size, q1, forward_real_fft,
  160. inverse_real_fft, spectral_envelope);
  161. }
  162. } // namespace
  163. int GetFFTSizeForCheapTrick(int fs, const CheapTrickOption *option) {
  164. return static_cast<int>(pow(2.0, 1.0 +
  165. static_cast<int>(log(3.0 * fs / option->f0_floor + 1) / world::kLog2)));
  166. }
  167. double GetF0FloorForCheapTrick(int fs, int fft_size) {
  168. return 3.0 * fs / (fft_size - 3.0);
  169. }
  170. void CheapTrick(const double *x, int x_length, int fs,
  171. const double *temporal_positions, const double *f0, int f0_length,
  172. const CheapTrickOption *option, double **spectrogram) {
  173. int fft_size = option->fft_size;
  174. randn_reseed();
  175. double f0_floor = GetF0FloorForCheapTrick(fs, fft_size);
  176. double *spectral_envelope = new double[fft_size];
  177. ForwardRealFFT forward_real_fft = {0};
  178. InitializeForwardRealFFT(fft_size, &forward_real_fft);
  179. InverseRealFFT inverse_real_fft = {0};
  180. InitializeInverseRealFFT(fft_size, &inverse_real_fft);
  181. double current_f0;
  182. for (int i = 0; i < f0_length; ++i) {
  183. current_f0 = f0[i] <= f0_floor ? world::kDefaultF0 : f0[i];
  184. CheapTrickGeneralBody(x, x_length, fs, current_f0, fft_size,
  185. temporal_positions[i], option->q1, &forward_real_fft,
  186. &inverse_real_fft, spectral_envelope);
  187. for (int j = 0; j <= fft_size / 2; ++j)
  188. spectrogram[i][j] = spectral_envelope[j];
  189. }
  190. DestroyForwardRealFFT(&forward_real_fft);
  191. DestroyInverseRealFFT(&inverse_real_fft);
  192. delete[] spectral_envelope;
  193. }
  194. void InitializeCheapTrickOption(int fs, CheapTrickOption *option) {
  195. // q1 is the parameter used for the spectral recovery.
  196. // Since The parameter is optimized, you don't need to change the parameter.
  197. option->q1 = -0.15;
  198. // f0_floor and fs are used to determine fft_size;
  199. // We strongly recommend not to change this value unless you have enough
  200. // knowledge of the signal processing in CheapTrick.
  201. option->f0_floor = world::kFloorF0;
  202. option->fft_size = GetFFTSizeForCheapTrick(fs, option);
  203. }