synthesis.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2012 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2018/12/13
  5. //
  6. // Voice synthesis based on f0, spectrogram and aperiodicity.
  7. // forward_real_fft, inverse_real_fft and minimum_phase are used to speed up.
  8. //-----------------------------------------------------------------------------
  9. #include "world/synthesis.h"
  10. #include <math.h>
  11. #include "world/common.h"
  12. #include "world/constantnumbers.h"
  13. #include "world/matlabfunctions.h"
  14. #include <assert.h>
  15. namespace {
  16. static void GetNoiseSpectrum(int noise_size, int fft_size,
  17. const ForwardRealFFT *forward_real_fft) {
  18. double average = 0.0;
  19. for (int i = 0; i < noise_size; ++i) {
  20. forward_real_fft->waveform[i] = randn();
  21. average += forward_real_fft->waveform[i];
  22. }
  23. average /= noise_size;
  24. for (int i = 0; i < noise_size; ++i)
  25. forward_real_fft->waveform[i] -= average;
  26. for (int i = noise_size; i < fft_size; ++i)
  27. forward_real_fft->waveform[i] = 0.0;
  28. fft_execute(forward_real_fft->forward_fft);
  29. }
  30. //-----------------------------------------------------------------------------
  31. // GetAperiodicResponse() calculates an aperiodic response.
  32. //-----------------------------------------------------------------------------
  33. static void GetAperiodicResponse(int noise_size, int fft_size,
  34. const double *spectrum, const double *aperiodic_ratio, double current_vuv,
  35. const ForwardRealFFT *forward_real_fft,
  36. const InverseRealFFT *inverse_real_fft,
  37. const MinimumPhaseAnalysis *minimum_phase, double *aperiodic_response) {
  38. GetNoiseSpectrum(noise_size, fft_size, forward_real_fft);
  39. if (current_vuv != 0.0)
  40. for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
  41. minimum_phase->log_spectrum[i] =
  42. log(spectrum[i] * aperiodic_ratio[i]) / 2.0;
  43. else
  44. for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
  45. minimum_phase->log_spectrum[i] = log(spectrum[i]) / 2.0;
  46. GetMinimumPhaseSpectrum(minimum_phase);
  47. for (int i = 0; i <= fft_size / 2; ++i) {
  48. inverse_real_fft->spectrum[i][0] =
  49. minimum_phase->minimum_phase_spectrum[i][0] *
  50. forward_real_fft->spectrum[i][0] -
  51. minimum_phase->minimum_phase_spectrum[i][1] *
  52. forward_real_fft->spectrum[i][1];
  53. inverse_real_fft->spectrum[i][1] =
  54. minimum_phase->minimum_phase_spectrum[i][0] *
  55. forward_real_fft->spectrum[i][1] +
  56. minimum_phase->minimum_phase_spectrum[i][1] *
  57. forward_real_fft->spectrum[i][0];
  58. }
  59. fft_execute(inverse_real_fft->inverse_fft);
  60. fftshift(inverse_real_fft->waveform, fft_size, aperiodic_response);
  61. }
  62. //-----------------------------------------------------------------------------
  63. // RemoveDCComponent()
  64. //-----------------------------------------------------------------------------
  65. static void RemoveDCComponent(const double *periodic_response, int fft_size,
  66. const double *dc_remover, double *new_periodic_response) {
  67. double dc_component = 0.0;
  68. for (int i = fft_size / 2; i < fft_size; ++i)
  69. dc_component += periodic_response[i];
  70. for (int i = 0; i < fft_size / 2; ++i)
  71. new_periodic_response[i] = -dc_component * dc_remover[i];
  72. for (int i = fft_size / 2; i < fft_size; ++i)
  73. new_periodic_response[i] -= dc_component * dc_remover[i];
  74. }
  75. //-----------------------------------------------------------------------------
  76. // GetSpectrumWithFractionalTimeShift() calculates a periodic spectrum with
  77. // the fractional time shift under 1/fs.
  78. //-----------------------------------------------------------------------------
  79. static void GetSpectrumWithFractionalTimeShift(int fft_size,
  80. double coefficient, const InverseRealFFT *inverse_real_fft) {
  81. double re, im, re2, im2;
  82. for (int i = 0; i <= fft_size / 2; ++i) {
  83. re = inverse_real_fft->spectrum[i][0];
  84. im = inverse_real_fft->spectrum[i][1];
  85. re2 = cos(coefficient * i);
  86. im2 = sqrt(1.0 - re2 * re2); // sin(pshift)
  87. inverse_real_fft->spectrum[i][0] = re * re2 + im * im2;
  88. inverse_real_fft->spectrum[i][1] = im * re2 - re * im2;
  89. }
  90. }
  91. //-----------------------------------------------------------------------------
  92. // GetPeriodicResponse() calculates a periodic response.
  93. //-----------------------------------------------------------------------------
  94. static void GetPeriodicResponse(int fft_size, const double *spectrum,
  95. const double *aperiodic_ratio, double current_vuv,
  96. const InverseRealFFT *inverse_real_fft,
  97. const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
  98. double fractional_time_shift, int fs, double *periodic_response) {
  99. if (current_vuv <= 0.5 || aperiodic_ratio[0] > 0.999) {
  100. for (int i = 0; i < fft_size; ++i) periodic_response[i] = 0.0;
  101. return;
  102. }
  103. for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
  104. minimum_phase->log_spectrum[i] =
  105. log(spectrum[i] * (1.0 - aperiodic_ratio[i]) +
  106. world::kMySafeGuardMinimum) / 2.0;
  107. GetMinimumPhaseSpectrum(minimum_phase);
  108. for (int i = 0; i <= fft_size / 2; ++i) {
  109. inverse_real_fft->spectrum[i][0] =
  110. minimum_phase->minimum_phase_spectrum[i][0];
  111. inverse_real_fft->spectrum[i][1] =
  112. minimum_phase->minimum_phase_spectrum[i][1];
  113. }
  114. // apply fractional time delay of fractional_time_shift seconds
  115. // using linear phase shift
  116. double coefficient =
  117. 2.0 * world::kPi * fractional_time_shift * fs / fft_size;
  118. GetSpectrumWithFractionalTimeShift(fft_size, coefficient, inverse_real_fft);
  119. fft_execute(inverse_real_fft->inverse_fft);
  120. fftshift(inverse_real_fft->waveform, fft_size, periodic_response);
  121. RemoveDCComponent(periodic_response, fft_size, dc_remover,
  122. periodic_response);
  123. }
  124. static void GetSpectralEnvelope(double current_time, double frame_period,
  125. int f0_length, const double * const *spectrogram, int fft_size,
  126. double *spectral_envelope) {
  127. int current_frame_floor = MyMinInt(f0_length - 1,
  128. static_cast<int>(floor(current_time / frame_period)));
  129. int current_frame_ceil = MyMinInt(f0_length - 1,
  130. static_cast<int>(ceil(current_time / frame_period)));
  131. double interpolation = current_time / frame_period - current_frame_floor;
  132. if (current_frame_floor == current_frame_ceil)
  133. for (int i = 0; i <= fft_size / 2; ++i)
  134. spectral_envelope[i] = fabs(spectrogram[current_frame_floor][i]);
  135. else
  136. for (int i = 0; i <= fft_size / 2; ++i)
  137. spectral_envelope[i] =
  138. (1.0 - interpolation) * fabs(spectrogram[current_frame_floor][i]) +
  139. interpolation * fabs(spectrogram[current_frame_ceil][i]);
  140. }
  141. static void GetAperiodicRatio(double current_time, double frame_period,
  142. int f0_length, const double * const *aperiodicity, int fft_size,
  143. double *aperiodic_spectrum) {
  144. int current_frame_floor = MyMinInt(f0_length - 1,
  145. static_cast<int>(floor(current_time / frame_period)));
  146. int current_frame_ceil = MyMinInt(f0_length - 1,
  147. static_cast<int>(ceil(current_time / frame_period)));
  148. double interpolation = current_time / frame_period - current_frame_floor;
  149. if (current_frame_floor == current_frame_ceil)
  150. for (int i = 0; i <= fft_size / 2; ++i)
  151. aperiodic_spectrum[i] =
  152. pow(GetSafeAperiodicity(aperiodicity[current_frame_floor][i]), 2.0);
  153. else
  154. for (int i = 0; i <= fft_size / 2; ++i)
  155. aperiodic_spectrum[i] = pow((1.0 - interpolation) *
  156. GetSafeAperiodicity(aperiodicity[current_frame_floor][i]) +
  157. interpolation *
  158. GetSafeAperiodicity(aperiodicity[current_frame_ceil][i]), 2.0);
  159. }
  160. //-----------------------------------------------------------------------------
  161. // GetOneFrameSegment() calculates a periodic and aperiodic response at a time.
  162. //-----------------------------------------------------------------------------
  163. static void GetOneFrameSegment(double current_vuv, int noise_size,
  164. const double * const *spectrogram, int fft_size,
  165. const double * const *aperiodicity, int f0_length, double frame_period,
  166. double current_time, double fractional_time_shift, int fs,
  167. const ForwardRealFFT *forward_real_fft,
  168. const InverseRealFFT *inverse_real_fft,
  169. const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
  170. double *response) {
  171. double *aperiodic_response = new double[fft_size];
  172. double *periodic_response = new double[fft_size];
  173. double *spectral_envelope = new double[fft_size];
  174. double *aperiodic_ratio = new double[fft_size];
  175. GetSpectralEnvelope(current_time, frame_period, f0_length, spectrogram,
  176. fft_size, spectral_envelope);
  177. GetAperiodicRatio(current_time, frame_period, f0_length, aperiodicity,
  178. fft_size, aperiodic_ratio);
  179. // Synthesis of the periodic response
  180. GetPeriodicResponse(fft_size, spectral_envelope, aperiodic_ratio,
  181. current_vuv, inverse_real_fft, minimum_phase, dc_remover,
  182. fractional_time_shift, fs, periodic_response);
  183. // Synthesis of the aperiodic response
  184. GetAperiodicResponse(noise_size, fft_size, spectral_envelope,
  185. aperiodic_ratio, current_vuv, forward_real_fft,
  186. inverse_real_fft, minimum_phase, aperiodic_response);
  187. double sqrt_noise_size = sqrt(static_cast<double>(noise_size));
  188. for (int i = 0; i < fft_size; ++i)
  189. response[i] =
  190. (periodic_response[i] * sqrt_noise_size + aperiodic_response[i]) /
  191. fft_size;
  192. delete[] spectral_envelope;
  193. delete[] aperiodic_ratio;
  194. delete[] periodic_response;
  195. delete[] aperiodic_response;
  196. }
  197. static void GetOneFrameSegmentMBR(int index, const double* f0,
  198. const double * const *spectrogram, int fft_size,
  199. const double * const *aperiodicity, int f0_length, double frame_period,
  200. //double current_time, double fractional_time_shift, int fs,
  201. int fs,
  202. const ForwardRealFFT *forward_real_fft,
  203. const InverseRealFFT *inverse_real_fft,
  204. const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
  205. double *response)
  206. {
  207. auto spectral_envelope = spectrogram[index];
  208. auto aperiodic_ratio = aperiodicity[index];
  209. double *aperiodic_response = new double[fft_size];
  210. double *periodic_response = new double[fft_size];
  211. double current_f0=f0[index];
  212. double fractional_time_shift=0;
  213. int noise_size=fs/500;
  214. if(current_f0>0)
  215. noise_size = fs/current_f0;
  216. if(noise_size>fft_size/2) noise_size=fft_size/2;
  217. //printf("GetOneFrameSegmentMBR noise_size=%i current_f0=%f\n",noise_size,current_f0);
  218. if(noise_size==0) return;
  219. // Synthesis of the periodic response
  220. GetPeriodicResponse(fft_size, spectral_envelope, aperiodic_ratio,
  221. current_f0, inverse_real_fft, minimum_phase, dc_remover,
  222. fractional_time_shift, fs, periodic_response);
  223. // Synthesis of the aperiodic response
  224. GetAperiodicResponse(noise_size, fft_size, spectral_envelope,
  225. aperiodic_ratio, current_f0, forward_real_fft,
  226. inverse_real_fft, minimum_phase, aperiodic_response);
  227. double sqrt_noise_size = sqrt(static_cast<double>(noise_size));
  228. for (int i = 0; i < fft_size; ++i)
  229. response[i] =
  230. (periodic_response[i] * sqrt_noise_size + aperiodic_response[i]) /
  231. fft_size;
  232. //response[0]=1.0;
  233. delete[] periodic_response;
  234. delete[] aperiodic_response;
  235. }
  236. static void GetTemporalParametersForTimeBase(const double *f0, int f0_length,
  237. int fs, int y_length, double frame_period, double lowest_f0,
  238. double *time_axis, double *coarse_time_axis, double *coarse_f0,
  239. double *coarse_vuv) {
  240. for (int i = 0; i < y_length; ++i)
  241. time_axis[i] = i / static_cast<double>(fs);
  242. // the array 'coarse_time_axis' is supposed to have 'f0_length + 1' positions
  243. for (int i = 0; i < f0_length; ++i) {
  244. coarse_time_axis[i] = i * frame_period;
  245. coarse_f0[i] = f0[i] < lowest_f0 ? 0.0 : f0[i];
  246. coarse_vuv[i] = coarse_f0[i] == 0.0 ? 0.0 : 1.0;
  247. }
  248. coarse_time_axis[f0_length] = f0_length * frame_period;
  249. coarse_f0[f0_length] = coarse_f0[f0_length - 1] * 2 -
  250. coarse_f0[f0_length - 2];
  251. coarse_vuv[f0_length] = coarse_vuv[f0_length - 1] * 2 -
  252. coarse_vuv[f0_length - 2];
  253. }
  254. static int GetPulseLocationsForTimeBase(const double *interpolated_f0,
  255. const double *time_axis, int y_length, int fs, double *pulse_locations,
  256. int *pulse_locations_index, double *pulse_locations_time_shift) {
  257. double *total_phase = new double[y_length];
  258. double *wrap_phase = new double[y_length];
  259. double *wrap_phase_abs = new double[y_length - 1];
  260. total_phase[0] = 2.0 * world::kPi * interpolated_f0[0] / fs;
  261. wrap_phase[0] = fmod(total_phase[0], 2.0 * world::kPi);
  262. for (int i = 1; i < y_length; ++i) {
  263. total_phase[i] = total_phase[i - 1] +
  264. 2.0 * world::kPi * interpolated_f0[i] / fs;
  265. wrap_phase[i] = fmod(total_phase[i], 2.0 * world::kPi);
  266. wrap_phase_abs[i - 1] = fabs(wrap_phase[i] - wrap_phase[i - 1]);
  267. }
  268. int number_of_pulses = 0;
  269. for (int i = 0; i < y_length - 1; ++i) {
  270. if (wrap_phase_abs[i] > world::kPi) {
  271. pulse_locations[number_of_pulses] = time_axis[i];
  272. pulse_locations_index[number_of_pulses] = i;
  273. // calculate the time shift in seconds between exact fractional pulse
  274. // position and the integer pulse position (sample i)
  275. // as we don't have access to the exact pulse position, we infer it
  276. // from the point between sample i and sample i + 1 where the
  277. // accummulated phase cross a multiple of 2pi
  278. // this point is found by solving y1 + x * (y2 - y1) = 0 for x, where y1
  279. // and y2 are the phases corresponding to sample i and i + 1, offset so
  280. // they cross zero; x >= 0
  281. double y1 = wrap_phase[i] - 2.0 * world::kPi;
  282. double y2 = wrap_phase[i + 1];
  283. double x = -y1 / (y2 - y1);
  284. pulse_locations_time_shift[number_of_pulses] = x / fs;
  285. ++number_of_pulses;
  286. }
  287. }
  288. delete[] wrap_phase_abs;
  289. delete[] wrap_phase;
  290. delete[] total_phase;
  291. return number_of_pulses;
  292. }
  293. static int GetTimeBase(const double *f0, int f0_length, int fs,
  294. double frame_period, int y_length, double lowest_f0,
  295. double *pulse_locations, int *pulse_locations_index,
  296. double *pulse_locations_time_shift, double *interpolated_vuv) {
  297. double *time_axis = new double[y_length];
  298. double *coarse_time_axis = new double[f0_length + 1];
  299. double *coarse_f0 = new double[f0_length + 1];
  300. double *coarse_vuv = new double[f0_length + 1];
  301. GetTemporalParametersForTimeBase(f0, f0_length, fs, y_length, frame_period,
  302. lowest_f0, time_axis, coarse_time_axis, coarse_f0, coarse_vuv);
  303. double *interpolated_f0 = new double[y_length];
  304. interp1(coarse_time_axis, coarse_f0, f0_length + 1,
  305. time_axis, y_length, interpolated_f0);
  306. interp1(coarse_time_axis, coarse_vuv, f0_length + 1,
  307. time_axis, y_length, interpolated_vuv);
  308. for (int i = 0; i < y_length; ++i) {
  309. interpolated_vuv[i] = interpolated_vuv[i] > 0.5 ? 1.0 : 0.0;
  310. interpolated_f0[i] =
  311. interpolated_vuv[i] == 0.0 ? world::kDefaultF0 : interpolated_f0[i];
  312. }
  313. int number_of_pulses = GetPulseLocationsForTimeBase(interpolated_f0,
  314. time_axis, y_length, fs, pulse_locations, pulse_locations_index,
  315. pulse_locations_time_shift);
  316. delete[] coarse_vuv;
  317. delete[] coarse_f0;
  318. delete[] coarse_time_axis;
  319. delete[] time_axis;
  320. delete[] interpolated_f0;
  321. return number_of_pulses;
  322. }
  323. static void GetDCRemover(int fft_size, double *dc_remover) {
  324. double dc_component = 0.0;
  325. for (int i = 0; i < fft_size / 2; ++i) {
  326. dc_remover[i] = 0.5 -
  327. 0.5 * cos(2.0 * world::kPi * (i + 1.0) / (1.0 + fft_size));
  328. dc_remover[fft_size - i - 1] = dc_remover[i];
  329. dc_component += dc_remover[i] * 2.0;
  330. }
  331. for (int i = 0; i < fft_size / 2; ++i) {
  332. dc_remover[i] /= dc_component;
  333. dc_remover[fft_size - i - 1] = dc_remover[i];
  334. }
  335. }
  336. } // namespace
  337. void Synthesis(const double *f0, int f0_length,
  338. const double * const *spectrogram, const double * const *aperiodicity,
  339. int fft_size, double frame_period, int fs, int y_length, double *y) {
  340. randn_reseed();
  341. double *impulse_response = new double[fft_size];
  342. for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  343. MinimumPhaseAnalysis minimum_phase = {0};
  344. InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase);
  345. InverseRealFFT inverse_real_fft = {0};
  346. InitializeInverseRealFFT(fft_size, &inverse_real_fft);
  347. ForwardRealFFT forward_real_fft = {0};
  348. InitializeForwardRealFFT(fft_size, &forward_real_fft);
  349. double *pulse_locations = new double[y_length];
  350. int *pulse_locations_index = new int[y_length];
  351. double *pulse_locations_time_shift = new double[y_length];
  352. double *interpolated_vuv = new double[y_length];
  353. int number_of_pulses = GetTimeBase(f0, f0_length, fs, frame_period / 1000.0,
  354. y_length, fs / fft_size + 1.0, pulse_locations, pulse_locations_index,
  355. pulse_locations_time_shift, interpolated_vuv);
  356. double *dc_remover = new double[fft_size];
  357. GetDCRemover(fft_size, dc_remover);
  358. frame_period /= 1000.0;
  359. int noise_size;
  360. int index, offset, lower_limit, upper_limit;
  361. for (int i = 0; i < number_of_pulses; ++i) {
  362. noise_size = pulse_locations_index[MyMinInt(number_of_pulses - 1, i + 1)] -
  363. pulse_locations_index[i];
  364. GetOneFrameSegment(interpolated_vuv[pulse_locations_index[i]], noise_size,
  365. spectrogram, fft_size, aperiodicity, f0_length, frame_period,
  366. pulse_locations[i], pulse_locations_time_shift[i], fs,
  367. &forward_real_fft, &inverse_real_fft, &minimum_phase, dc_remover,
  368. impulse_response);
  369. offset = pulse_locations_index[i] - fft_size / 2 + 1;
  370. lower_limit = MyMaxInt(0, -offset);
  371. upper_limit = MyMinInt(fft_size, y_length - offset);
  372. for (int j = lower_limit; j < upper_limit; ++j) {
  373. index = j + offset;
  374. y[index] += impulse_response[j];
  375. }
  376. }
  377. delete[] dc_remover;
  378. delete[] pulse_locations;
  379. delete[] pulse_locations_index;
  380. delete[] pulse_locations_time_shift;
  381. delete[] interpolated_vuv;
  382. DestroyMinimumPhaseAnalysis(&minimum_phase);
  383. DestroyInverseRealFFT(&inverse_real_fft);
  384. DestroyForwardRealFFT(&forward_real_fft);
  385. delete[] impulse_response;
  386. }
  387. void SynthesisMBR(const double *f0, int f0_length,
  388. const double * const *spectrogram, const double * const *aperiodicity,
  389. int fft_size, double frame_period, int fs ,int y_length, double *y) {
  390. randn_reseed();
  391. for (int i = 0; i < y_length; ++i) y[i] = 0.0;
  392. MinimumPhaseAnalysis minimum_phase = {0};
  393. InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase);
  394. InverseRealFFT inverse_real_fft = {0};
  395. InitializeInverseRealFFT(fft_size, &inverse_real_fft);
  396. ForwardRealFFT forward_real_fft = {0};
  397. InitializeForwardRealFFT(fft_size, &forward_real_fft);
  398. double *dc_remover = new double[fft_size];
  399. GetDCRemover(fft_size, dc_remover);
  400. assert(y_length == fft_size*f0_length);
  401. for(int i=0;i<f0_length;i++)
  402. {
  403. GetOneFrameSegmentMBR(i,f0,
  404. //interpolated_vuv[pulse_locations_index[i]], noise_size,
  405. spectrogram, fft_size, aperiodicity, f0_length, frame_period,
  406. //pulse_locations[i], pulse_locations_time_shift[i], fs,
  407. fs,
  408. &forward_real_fft, &inverse_real_fft, &minimum_phase, dc_remover,
  409. &y[fft_size*i]);
  410. }
  411. delete[] dc_remover;
  412. DestroyMinimumPhaseAnalysis(&minimum_phase);
  413. DestroyInverseRealFFT(&inverse_real_fft);
  414. DestroyForwardRealFFT(&forward_real_fft);
  415. }