d4c.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2012 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2017/02/01
  5. //
  6. // Band-aperiodicity estimation on the basis of the idea of D4C.
  7. //-----------------------------------------------------------------------------
  8. #include "world/d4c.h"
  9. #include <math.h>
  10. #include <algorithm> // for std::sort()
  11. #include "world/common.h"
  12. #include "world/constantnumbers.h"
  13. #include "world/matlabfunctions.h"
  14. namespace {
  15. //-----------------------------------------------------------------------------
  16. // SetParametersForGetWindowedWaveform()
  17. //-----------------------------------------------------------------------------
  18. static void SetParametersForGetWindowedWaveform(int half_window_length,
  19. int x_length, double current_position, int fs, double current_f0,
  20. int window_type, double window_length_ratio, int *base_index,
  21. int *safe_index, double *window) {
  22. for (int i = -half_window_length; i <= half_window_length; ++i)
  23. base_index[i + half_window_length] = i;
  24. int origin = matlab_round(current_position * fs + 0.001);
  25. for (int i = 0; i <= half_window_length * 2; ++i)
  26. safe_index[i] =
  27. MyMinInt(x_length - 1, MyMaxInt(0, origin + base_index[i]));
  28. // Designing of the window function
  29. double position;
  30. if (window_type == world::kHanning) { // Hanning window
  31. for (int i = 0; i <= half_window_length * 2; ++i) {
  32. position = (2.0 * base_index[i] / window_length_ratio) / fs;
  33. window[i] = 0.5 * cos(world::kPi * position * current_f0) + 0.5;
  34. }
  35. } else { // Blackman window
  36. for (int i = 0; i <= half_window_length * 2; ++i) {
  37. position = (2.0 * base_index[i] / window_length_ratio) / fs;
  38. window[i] = 0.42 + 0.5 * cos(world::kPi * position * current_f0) +
  39. 0.08 * cos(world::kPi * position * current_f0 * 2);
  40. }
  41. }
  42. }
  43. //-----------------------------------------------------------------------------
  44. // GetWindowedWaveform() windows the waveform by F0-adaptive window
  45. // In the variable window_type, 1: hanning, 2: blackman
  46. //-----------------------------------------------------------------------------
  47. static void GetWindowedWaveform(const double *x, int x_length, int fs,
  48. double current_f0, double current_position, int window_type,
  49. double window_length_ratio, double *waveform) {
  50. int half_window_length =
  51. matlab_round(window_length_ratio * fs / current_f0 / 2.0);
  52. int *base_index = new int[half_window_length * 2 + 1];
  53. int *safe_index = new int[half_window_length * 2 + 1];
  54. double *window = new double[half_window_length * 2 + 1];
  55. SetParametersForGetWindowedWaveform(half_window_length, x_length,
  56. current_position, fs, current_f0, window_type, window_length_ratio,
  57. base_index, safe_index, window);
  58. // F0-adaptive windowing
  59. for (int i = 0; i <= half_window_length * 2; ++i)
  60. waveform[i] =
  61. x[safe_index[i]] * window[i] + randn() * world::kMySafeGuardMinimum;
  62. double tmp_weight1 = 0;
  63. double tmp_weight2 = 0;
  64. for (int i = 0; i <= half_window_length * 2; ++i) {
  65. tmp_weight1 += waveform[i];
  66. tmp_weight2 += window[i];
  67. }
  68. double weighting_coefficient = tmp_weight1 / tmp_weight2;
  69. for (int i = 0; i <= half_window_length * 2; ++i)
  70. waveform[i] -= window[i] * weighting_coefficient;
  71. delete[] base_index;
  72. delete[] safe_index;
  73. delete[] window;
  74. }
  75. //-----------------------------------------------------------------------------
  76. // GetCentroid() calculates the energy centroid (see the book, time-frequency
  77. // analysis written by L. Cohen).
  78. //-----------------------------------------------------------------------------
  79. static void GetCentroid(const double *x, int x_length, int fs,
  80. double current_f0, int fft_size, double current_position,
  81. const ForwardRealFFT *forward_real_fft, double *centroid) {
  82. for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0;
  83. GetWindowedWaveform(x, x_length, fs, current_f0,
  84. current_position, world::kBlackman, 4.0, forward_real_fft->waveform);
  85. double power = 0.0;
  86. for (int i = 0; i <= matlab_round(2.0 * fs / current_f0) * 2; ++i)
  87. power += forward_real_fft->waveform[i] * forward_real_fft->waveform[i];
  88. for (int i = 0; i <= matlab_round(2.0 * fs / current_f0) * 2; ++i)
  89. forward_real_fft->waveform[i] /= sqrt(power);
  90. fft_execute(forward_real_fft->forward_fft);
  91. double *tmp_real = new double[fft_size / 2 + 1];
  92. double *tmp_imag = new double[fft_size / 2 + 1];
  93. for (int i = 0; i <= fft_size / 2; ++i) {
  94. tmp_real[i] = forward_real_fft->spectrum[i][0];
  95. tmp_imag[i] = forward_real_fft->spectrum[i][1];
  96. }
  97. for (int i = 0; i < fft_size; ++i)
  98. forward_real_fft->waveform[i] *= i + 1.0;
  99. fft_execute(forward_real_fft->forward_fft);
  100. for (int i = 0; i <= fft_size / 2; ++i)
  101. centroid[i] = forward_real_fft->spectrum[i][0] * tmp_real[i] +
  102. tmp_imag[i] * forward_real_fft->spectrum[i][1];
  103. delete[] tmp_real;
  104. delete[] tmp_imag;
  105. }
  106. //-----------------------------------------------------------------------------
  107. // GetStaticCentroid() calculates the temporally static energy centroid.
  108. // Basic idea was proposed by H. Kawahara.
  109. //-----------------------------------------------------------------------------
  110. static void GetStaticCentroid(const double *x, int x_length, int fs,
  111. double current_f0, int fft_size, double current_position,
  112. const ForwardRealFFT *forward_real_fft, double *static_centroid) {
  113. double *centroid1 = new double[fft_size / 2 + 1];
  114. double *centroid2 = new double[fft_size / 2 + 1];
  115. GetCentroid(x, x_length, fs, current_f0, fft_size,
  116. current_position - 0.25 / current_f0, forward_real_fft, centroid1);
  117. GetCentroid(x, x_length, fs, current_f0, fft_size,
  118. current_position + 0.25 / current_f0, forward_real_fft, centroid2);
  119. for (int i = 0; i <= fft_size / 2; ++i)
  120. static_centroid[i] = centroid1[i] + centroid2[i];
  121. DCCorrection(static_centroid, current_f0, fs, fft_size, static_centroid);
  122. delete[] centroid1;
  123. delete[] centroid2;
  124. }
  125. //-----------------------------------------------------------------------------
  126. // GetSmoothedPowerSpectrum() calculates the smoothed power spectrum.
  127. // The parameters used for smoothing are optimized in davance.
  128. //-----------------------------------------------------------------------------
  129. static void GetSmoothedPowerSpectrum(const double *x, int x_length, int fs,
  130. double current_f0, int fft_size, double current_position,
  131. const ForwardRealFFT *forward_real_fft, double *smoothed_power_spectrum) {
  132. for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0;
  133. GetWindowedWaveform(x, x_length, fs, current_f0,
  134. current_position, world::kHanning, 4.0, forward_real_fft->waveform);
  135. fft_execute(forward_real_fft->forward_fft);
  136. for (int i = 0; i <= fft_size / 2; ++i)
  137. smoothed_power_spectrum[i] =
  138. forward_real_fft->spectrum[i][0] * forward_real_fft->spectrum[i][0] +
  139. forward_real_fft->spectrum[i][1] * forward_real_fft->spectrum[i][1];
  140. DCCorrection(smoothed_power_spectrum, current_f0, fs, fft_size,
  141. smoothed_power_spectrum);
  142. LinearSmoothing(smoothed_power_spectrum, current_f0, fs, fft_size,
  143. smoothed_power_spectrum);
  144. }
  145. //-----------------------------------------------------------------------------
  146. // GetStaticGroupDelay() calculates the temporally static group delay.
  147. // This is the fundamental parameter in D4C.
  148. //-----------------------------------------------------------------------------
  149. static void GetStaticGroupDelay(const double *static_centroid,
  150. const double *smoothed_power_spectrum, int fs, double f0,
  151. int fft_size, double *static_group_delay) {
  152. for (int i = 0; i <= fft_size / 2; ++i)
  153. static_group_delay[i] = static_centroid[i] / smoothed_power_spectrum[i];
  154. LinearSmoothing(static_group_delay, f0 / 2.0, fs, fft_size,
  155. static_group_delay);
  156. double *smoothed_group_delay = new double[fft_size / 2 + 1];
  157. LinearSmoothing(static_group_delay, f0, fs, fft_size,
  158. smoothed_group_delay);
  159. for (int i = 0; i <= fft_size / 2; ++i)
  160. static_group_delay[i] -= smoothed_group_delay[i];
  161. delete[] smoothed_group_delay;
  162. }
  163. //-----------------------------------------------------------------------------
  164. // GetCoarseAperiodicity() calculates the aperiodicity in multiples of 3 kHz.
  165. // The upper limit is given based on the sampling frequency.
  166. //-----------------------------------------------------------------------------
  167. static void GetCoarseAperiodicity(const double *static_group_delay, int fs,
  168. int fft_size, int number_of_aperiodicities, const double *window,
  169. int window_length, const ForwardRealFFT *forward_real_fft,
  170. double *coarse_aperiodicity) {
  171. int boundary =
  172. matlab_round(fft_size * 8.0 / window_length);
  173. int half_window_length = window_length / 2;
  174. for (int i = 0; i < fft_size; ++i) forward_real_fft->waveform[i] = 0.0;
  175. double *power_spectrum = new double[fft_size / 2 + 1];
  176. int center;
  177. for (int i = 0; i < number_of_aperiodicities; ++i) {
  178. center =
  179. static_cast<int>(world::kFrequencyInterval * (i + 1) * fft_size / fs);
  180. for (int j = 0; j <= half_window_length * 2; ++j)
  181. forward_real_fft->waveform[j] =
  182. static_group_delay[center - half_window_length + j] * window[j];
  183. fft_execute(forward_real_fft->forward_fft);
  184. for (int j = 0 ; j <= fft_size / 2; ++j)
  185. power_spectrum[j] =
  186. forward_real_fft->spectrum[j][0] * forward_real_fft->spectrum[j][0] +
  187. forward_real_fft->spectrum[j][1] * forward_real_fft->spectrum[j][1];
  188. std::sort(power_spectrum, power_spectrum + fft_size / 2 + 1);
  189. for (int j = 1 ; j <= fft_size / 2; ++j)
  190. power_spectrum[j] += power_spectrum[j - 1];
  191. coarse_aperiodicity[i] =
  192. 10 * log10(power_spectrum[fft_size / 2 - boundary - 1] /
  193. power_spectrum[fft_size / 2]);
  194. }
  195. delete[] power_spectrum;
  196. }
  197. static double D4CLoveTrainSub(const double *x, int fs, int x_length,
  198. double current_f0, double current_position, int f0_length, int fft_size,
  199. int boundary0, int boundary1, int boundary2,
  200. ForwardRealFFT *forward_real_fft) {
  201. double *power_spectrum = new double[fft_size];
  202. int window_length = matlab_round(1.5 * fs / current_f0) * 2 + 1;
  203. GetWindowedWaveform(x, x_length, fs, current_f0, current_position,
  204. world::kBlackman, 3.0, forward_real_fft->waveform);
  205. for (int i = window_length; i < fft_size; ++i)
  206. forward_real_fft->waveform[i] = 0.0;
  207. fft_execute(forward_real_fft->forward_fft);
  208. for (int i = 0; i <= boundary0; ++i) power_spectrum[i] = 0.0;
  209. for (int i = boundary0 + 1; i < fft_size / 2 + 1; ++i)
  210. power_spectrum[i] =
  211. forward_real_fft->spectrum[i][0] * forward_real_fft->spectrum[i][0] +
  212. forward_real_fft->spectrum[i][1] * forward_real_fft->spectrum[i][1];
  213. for (int i = boundary0; i <= boundary2; ++i)
  214. power_spectrum[i] += +power_spectrum[i - 1];
  215. double aperiodicity0 = power_spectrum[boundary1] / power_spectrum[boundary2];
  216. delete[] power_spectrum;
  217. return aperiodicity0;
  218. }
  219. //-----------------------------------------------------------------------------
  220. // D4CLoveTrain() determines the aperiodicity with VUV detection.
  221. // If a frame was determined as the unvoiced section, aperiodicity is set to
  222. // very high value as the safeguard.
  223. // If it was voiced section, the aperiodicity of 0 Hz is set to -60 dB.
  224. //-----------------------------------------------------------------------------
  225. static void D4CLoveTrain(const double *x, int fs, int x_length,
  226. const double *f0, int f0_length, const double *temporal_positions,
  227. double *aperiodicity0) {
  228. double lowest_f0 = 40.0;
  229. int fft_size = static_cast<int>(pow(2.0, 1.0 +
  230. static_cast<int>(log(3.0 * fs / lowest_f0 + 1) / world::kLog2)));
  231. ForwardRealFFT forward_real_fft = { 0 };
  232. InitializeForwardRealFFT(fft_size, &forward_real_fft);
  233. // Cumulative powers at 100, 4000, 7900 Hz are used for VUV identification.
  234. int boundary0 = static_cast<int>(ceil(100.0 * fft_size / fs));
  235. int boundary1 = static_cast<int>(ceil(4000.0 * fft_size / fs));
  236. int boundary2 = static_cast<int>(ceil(7900.0 * fft_size / fs));
  237. for (int i = 0; i < f0_length; ++i) {
  238. if (f0[i] == 0.0) {
  239. aperiodicity0[i] = 0.0;
  240. continue;
  241. }
  242. aperiodicity0[i] = D4CLoveTrainSub(x, fs, x_length,
  243. MyMaxDouble(f0[i], lowest_f0), temporal_positions[i], f0_length,
  244. fft_size, boundary0, boundary1, boundary2, &forward_real_fft);
  245. }
  246. DestroyForwardRealFFT(&forward_real_fft);
  247. }
  248. //-----------------------------------------------------------------------------
  249. // D4CGeneralBody() calculates a spectral envelope at a temporal
  250. // position. This function is only used in D4C().
  251. // Caution:
  252. // forward_fft is allocated in advance to speed up the processing.
  253. //-----------------------------------------------------------------------------
  254. static void D4CGeneralBody(const double *x, int x_length, int fs,
  255. double current_f0, int fft_size, double current_position,
  256. int number_of_aperiodicities, const double *window, int window_length,
  257. const ForwardRealFFT *forward_real_fft, double *coarse_aperiodicity) {
  258. double *static_centroid = new double[fft_size / 2 + 1];
  259. double *smoothed_power_spectrum = new double[fft_size / 2 + 1];
  260. double *static_group_delay = new double[fft_size / 2 + 1];
  261. GetStaticCentroid(x, x_length, fs, current_f0, fft_size, current_position,
  262. forward_real_fft, static_centroid);
  263. GetSmoothedPowerSpectrum(x, x_length, fs, current_f0, fft_size,
  264. current_position, forward_real_fft, smoothed_power_spectrum);
  265. GetStaticGroupDelay(static_centroid, smoothed_power_spectrum,
  266. fs, current_f0, fft_size, static_group_delay);
  267. GetCoarseAperiodicity(static_group_delay, fs, fft_size,
  268. number_of_aperiodicities, window, window_length, forward_real_fft,
  269. coarse_aperiodicity);
  270. // Revision of the result based on the F0
  271. for (int i = 0; i < number_of_aperiodicities; ++i)
  272. coarse_aperiodicity[i] = MyMinDouble(0.0,
  273. coarse_aperiodicity[i] + (current_f0 - 100) / 50.0);
  274. delete[] static_centroid;
  275. delete[] smoothed_power_spectrum;
  276. delete[] static_group_delay;
  277. }
  278. static void InitializeAperiodicity(int f0_length, int fft_size,
  279. double **aperiodicity) {
  280. for (int i = 0; i < f0_length; ++i)
  281. for (int j = 0; j < fft_size / 2 + 1; ++j)
  282. aperiodicity[i][j] = 1.0 - world::kMySafeGuardMinimum;
  283. }
  284. static void GetAperiodicity(const double *coarse_frequency_axis,
  285. const double *coarse_aperiodicity, int number_of_aperiodicities,
  286. const double *frequency_axis, int fft_size, double *aperiodicity) {
  287. interp1(coarse_frequency_axis, coarse_aperiodicity,
  288. number_of_aperiodicities + 2, frequency_axis, fft_size / 2 + 1,
  289. aperiodicity);
  290. for (int i = 0; i <= fft_size / 2; ++i)
  291. aperiodicity[i] = pow(10.0, aperiodicity[i] / 20.0);
  292. }
  293. } // namespace
  294. void D4C(const double *x, int x_length, int fs,
  295. const double *temporal_positions, const double *f0, int f0_length,
  296. int fft_size, const D4COption *option, double **aperiodicity) {
  297. randn_reseed();
  298. InitializeAperiodicity(f0_length, fft_size, aperiodicity);
  299. int fft_size_d4c = static_cast<int>(pow(2.0, 1.0 +
  300. static_cast<int>(log(4.0 * fs / world::kFloorF0D4C + 1) /
  301. world::kLog2)));
  302. ForwardRealFFT forward_real_fft = {0};
  303. InitializeForwardRealFFT(fft_size_d4c, &forward_real_fft);
  304. int number_of_aperiodicities =
  305. static_cast<int>(MyMinDouble(world::kUpperLimit, fs / 2.0 -
  306. world::kFrequencyInterval) / world::kFrequencyInterval);
  307. // Since the window function is common in D4CGeneralBody(),
  308. // it is designed here to speed up.
  309. int window_length =
  310. static_cast<int>(world::kFrequencyInterval * fft_size_d4c / fs) * 2 + 1;
  311. double *window = new double[window_length];
  312. NuttallWindow(window_length, window);
  313. // D4C Love Train (Aperiodicity of 0 Hz is given by the different algorithm)
  314. double *aperiodicity0 = new double[f0_length];
  315. D4CLoveTrain(x, fs, x_length, f0, f0_length, temporal_positions,
  316. aperiodicity0);
  317. double *coarse_aperiodicity = new double[number_of_aperiodicities + 2];
  318. coarse_aperiodicity[0] = -60.0;
  319. coarse_aperiodicity[number_of_aperiodicities + 1] =
  320. -world::kMySafeGuardMinimum;
  321. double *coarse_frequency_axis = new double[number_of_aperiodicities + 2];
  322. for (int i = 0; i <= number_of_aperiodicities; ++i)
  323. coarse_frequency_axis[i] = i * world::kFrequencyInterval;
  324. coarse_frequency_axis[number_of_aperiodicities + 1] = fs / 2.0;
  325. double *frequency_axis = new double[fft_size / 2 + 1];
  326. for (int i = 0; i <= fft_size / 2; ++i)
  327. frequency_axis[i] = static_cast<double>(i) * fs / fft_size;
  328. for (int i = 0; i < f0_length; ++i) {
  329. if (f0[i] == 0 || aperiodicity0[i] <= option->threshold) continue;
  330. D4CGeneralBody(x, x_length, fs, MyMaxDouble(world::kFloorF0D4C, f0[i]),
  331. fft_size_d4c, temporal_positions[i], number_of_aperiodicities, window,
  332. window_length, &forward_real_fft, &coarse_aperiodicity[1]);
  333. // Linear interpolation to convert the coarse aperiodicity into its
  334. // spectral representation.
  335. GetAperiodicity(coarse_frequency_axis, coarse_aperiodicity,
  336. number_of_aperiodicities, frequency_axis, fft_size, aperiodicity[i]);
  337. }
  338. DestroyForwardRealFFT(&forward_real_fft);
  339. delete[] aperiodicity0;
  340. delete[] coarse_frequency_axis;
  341. delete[] coarse_aperiodicity;
  342. delete[] window;
  343. delete[] frequency_axis;
  344. }
  345. void InitializeD4COption(D4COption *option) {
  346. option->threshold = world::kThreshold;
  347. }