synthesis.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /*
  2. *
  3. * Synthesis API for WORLD 0.0.1 by M. Morise
  4. *
  5. * synthesis.cpp
  6. * (c) M. Morise 2010-
  7. * edit and comment by HAL, kbinani
  8. *
  9. * This file is a part of WORLD system.
  10. * WORLD needs FFTW. Please install FFTW to use these files.
  11. * FFTW is here; http://www.fftw.org/
  12. *
  13. * notice: this comment is added by HAL.
  14. * Original files are on the web page below;
  15. * http://www.aspl.is.ritsumei.ac.jp/morise/world/
  16. * (this file is from WORLD 0.0.1)
  17. *
  18. * synthesis.cpp includes a set of functions
  19. * that supports synthesis on WORLD 0.0.1.
  20. * Notice that aperiodicity of 0.0.1 is much different
  21. * from that of 0.0.4. Please use platinum function
  22. * for synthesis function.
  23. *
  24. */
  25. #include "world.h"
  26. #include <stdio.h> // for debug
  27. #include <stdlib.h>
  28. // spectrum, cepstrumは毎回malloc, freeするのが面倒だから.
  29. void getOneFrameSegment(double *f0, int tLen, double **specgram, double **aperiodicity, int fftl, double framePeriod, double currentTime, int fs, double defaultF0,
  30. fftw_complex *spectrum, fftw_complex *cepstrum,
  31. double *response, int xLen);
  32. // xorshift法と中心極限定理との組み合わせ
  33. /*double randn(void)
  34. {
  35. static unsigned int x = 123456789;
  36. static unsigned int y = 362436069;
  37. static unsigned int z = 521288629;
  38. static unsigned int w = 88675123;
  39. unsigned int t;
  40. t = x ^ (x << 11);
  41. x = y; y = z; z = w;
  42. int i;
  43. double ans;
  44. unsigned int tmp;
  45. tmp = 0;
  46. ans = 0.0;
  47. for(i = 0;i < 12;i++)
  48. {
  49. t = x ^ (x << 11);
  50. x = y; y = z; z = w;
  51. w = (w ^ (w >> 19)) ^ (t ^ (t >> 8));
  52. tmp += w>>4;
  53. }
  54. ans = (double)tmp / 268435456.0 - 6.0;
  55. return ans;
  56. }*/
  57. void getMinimumPhaseSpectrum(double *inputSpec, fftw_complex *spectrum, fftw_complex *cepstrum, int fftl, fftw_plan forwardFFT, fftw_plan inverseFFT)
  58. {
  59. int i;
  60. double real, imag;
  61. // 値を取り出す
  62. for(i = 0;i <= fftl/2;i++)
  63. {
  64. // 加算は暫定処置
  65. spectrum[i][0] = log(inputSpec[i] + 1.0e-17)/2.0;
  66. spectrum[i][1] = 0.0;
  67. }
  68. for(;i < fftl;i++)
  69. {
  70. spectrum[i][0] = spectrum[fftl-i][0];
  71. spectrum[i][1] = 0.0;
  72. }
  73. fftw_execute(forwardFFT);
  74. for(i = 1;i < fftl/2;i++)
  75. {
  76. cepstrum[i][0] = 0.0;
  77. cepstrum[i][1] = 0.0;
  78. }
  79. for(;i < fftl;i++)
  80. {
  81. cepstrum[i][0] *= 2.0;
  82. cepstrum[i][1] *= 2.0;
  83. }
  84. fftw_execute(inverseFFT);
  85. for(i = 0;i < fftl;i++)
  86. {
  87. real = exp(spectrum[i][0]/(double)fftl)*cos(spectrum[i][1]/(double)fftl);
  88. imag = exp(spectrum[i][0]/(double)fftl)*sin(spectrum[i][1]/(double)fftl);
  89. spectrum[i][0] = real;
  90. spectrum[i][1] = imag;
  91. }
  92. }
  93. // 特定時刻の応答を取得する.
  94. void getOneFrameSegment(double *f0, int tLen, double **specgram, double **aperiodicity, int fftl, double framePeriod, double currentTime, int fs, double defaultF0,
  95. fftw_complex *spectrum, fftw_complex *cepstrum,
  96. double *response, int xLen)
  97. {
  98. int i, offset, noiseSize;
  99. double real, imag, tmp;
  100. fftw_plan inverseFFT_RP; // FFTセット
  101. fftw_plan inverseFFT_RA; // FFTセット
  102. fftw_plan forwardFFT_R; // FFTセット
  103. fftw_plan minForward, minInverse;
  104. int currentFrame, currentPosition;
  105. double *periodicSpec, *aperiodicSpec, *aperiodicRatio;
  106. periodicSpec = (double *)malloc(sizeof(double)* fftl);
  107. aperiodicSpec = (double *)malloc(sizeof(double)* fftl);
  108. aperiodicRatio = (double *)malloc(sizeof(double)* fftl);
  109. fftw_complex *noiseSpec;
  110. noiseSpec = (fftw_complex *)malloc(sizeof(fftw_complex)* fftl);
  111. double *aperiodicResponse, *periodicResponse;
  112. aperiodicResponse = (double *)malloc(sizeof(double)* fftl);
  113. periodicResponse = (double *)malloc(sizeof(double)* fftl);
  114. #ifdef STND_MULTI_THREAD
  115. if( hFFTWMutex ){
  116. hFFTWMutex->lock();
  117. }
  118. #endif
  119. forwardFFT_R = fftw_plan_dft_r2c_1d(fftl, aperiodicResponse, noiseSpec, FFTW_ESTIMATE);
  120. inverseFFT_RP = fftw_plan_dft_c2r_1d(fftl, spectrum, periodicResponse , FFTW_ESTIMATE);
  121. inverseFFT_RA = fftw_plan_dft_c2r_1d(fftl, spectrum, aperiodicResponse, FFTW_ESTIMATE);
  122. minForward = fftw_plan_dft_1d(fftl, spectrum, cepstrum, FFTW_FORWARD, FFTW_ESTIMATE);
  123. minInverse = fftw_plan_dft_1d(fftl, cepstrum, spectrum, FFTW_BACKWARD, FFTW_ESTIMATE);
  124. #ifdef STND_MULTI_THREAD
  125. if( hFFTWMutex ){
  126. hFFTWMutex->unlock();
  127. }
  128. #endif
  129. currentFrame = (int)(currentTime/(framePeriod/1000.0) + 0.5);
  130. currentPosition = (int)(currentTime*(double)fs);
  131. tmp = currentTime + 1.0/(f0[currentFrame] == 0.0 ? defaultF0 : f0[currentFrame]);
  132. noiseSize = (int)(tmp*(double)fs) - currentPosition;
  133. if(f0[currentFrame] == 0.0)
  134. { // 無声音として扱う(波形の再利用)
  135. offset = currentPosition - (int)((double)currentFrame*framePeriod/1000.0*(double)fs);
  136. if(offset < 0)
  137. {
  138. currentFrame-=1;
  139. offset = currentPosition - (int)((double)currentFrame*framePeriod/1000.0*(double)fs);
  140. }
  141. for(i = 0;i < (int)((double)fs/defaultF0);i++)
  142. {
  143. if(i+currentPosition >= xLen || i+offset > fftl/2) break;
  144. response[i] = specgram[currentFrame][i+offset];
  145. }
  146. }
  147. else
  148. { // 有声音として扱う
  149. // 非周期性指標の計算
  150. calculateAperiodicity(aperiodicity[currentFrame], fftl, fs, aperiodicRatio);
  151. // 値を取り出す
  152. for(i = 0;i <= fftl/2;i++)
  153. {
  154. periodicSpec[i] = specgram[currentFrame][i] * aperiodicRatio[i];
  155. }
  156. getMinimumPhaseSpectrum(periodicSpec, spectrum, cepstrum, fftl, minForward, minInverse);
  157. fftw_execute(inverseFFT_RP);
  158. // ここから非周期音の合成
  159. for(i = 0;i <= fftl/2;i++)
  160. {
  161. aperiodicSpec[i] = specgram[currentFrame][i] * ((1-aperiodicRatio[i])+0.000000000000001);
  162. }
  163. getMinimumPhaseSpectrum(aperiodicSpec, spectrum, cepstrum, fftl, minForward, minInverse);
  164. for(i = 0;i < noiseSize;i++)
  165. aperiodicResponse[i] = randn();
  166. for(;i < fftl;i++)
  167. aperiodicResponse[i] = 0.0;
  168. fftw_execute(forwardFFT_R);
  169. for(i = 0;i <= fftl/2;i++)
  170. {
  171. real = spectrum[i][0]*noiseSpec[i][0] - spectrum[i][1]*noiseSpec[i][1];
  172. imag = spectrum[i][0]*noiseSpec[i][1] + spectrum[i][1]*noiseSpec[i][0];
  173. spectrum[i][0] = real;
  174. spectrum[i][1] = imag;
  175. }
  176. fftw_execute(inverseFFT_RA);
  177. // 1.633はエネルギー調整用のマジックナンバー
  178. // 基本周期の3倍のハニング窓で波形を切り出した際のエネルギー損失を補償している.
  179. // *8.0は完全にマジックナンバー
  180. // ここは,将来リリースでの調整が必須です.
  181. for(i = 0;i < fftl;i++)
  182. {
  183. //response[i] = (periodicResponse[i]*sqrt((double)noiseSize) + aperiodicResponse[i])*1.633/(double)fftl*12.0/sqrt((double)noiseSize);
  184. // IFFT->FFT の係数による補正は入力波形の正規化の段階で行っている. byHAL
  185. response[i] = (periodicResponse[i]*sqrt((double)noiseSize) + aperiodicResponse[i])*1.633*12.0/sqrt((double)noiseSize);
  186. // response[i] = (periodicResponse[i]/sqrt((double)noiseSize))*1.633/(double)fftl*12.0;
  187. // aperiodicResponse[i]/(double)noiseSize;
  188. // response[i] = aperiodicResponse[i]/(double)fftl/sqrt((double)noiseSize);
  189. }
  190. }
  191. #ifdef STND_MULTI_THREAD
  192. if( hFFTWMutex ){
  193. hFFTWMutex->lock();
  194. }
  195. #endif
  196. fftw_destroy_plan(inverseFFT_RP);
  197. fftw_destroy_plan(inverseFFT_RA);
  198. fftw_destroy_plan(forwardFFT_R);
  199. fftw_destroy_plan(minForward);
  200. fftw_destroy_plan(minInverse);
  201. #ifdef STND_MULTI_THREAD
  202. if( hFFTWMutex ){
  203. hFFTWMutex->unlock();
  204. }
  205. #endif
  206. free(periodicSpec);
  207. free(aperiodicSpec);
  208. free(periodicResponse);
  209. free(aperiodicResponse);
  210. free(aperiodicRatio);
  211. free(noiseSpec);
  212. }
  213. void synthesis(double *f0, int tLen, double **specgram, double **aperiodicity, int fftl, double framePeriod, int fs,
  214. double *synthesisOut, int xLen)
  215. {
  216. int i,j;
  217. // double defaultF0 = 300.0; // うーん.
  218. double defaultF0 = DEFAULT_F0; // うーん.
  219. double *impulseResponse;
  220. impulseResponse = (double *)malloc(sizeof(double) * fftl);
  221. fftw_complex *cepstrum, *spectrum; // ケプストラムとスペクトル
  222. cepstrum = (fftw_complex *)malloc(sizeof(fftw_complex) * fftl);
  223. spectrum = (fftw_complex *)malloc(sizeof(fftw_complex) * fftl);
  224. double currentTime = 0.0;
  225. int currentPosition = 0;//currentTime / framePeriod;
  226. int currentFrame = 0;
  227. for(i = 0;;i++)
  228. {
  229. // 完全な無音は飛ばす.
  230. if(currentFrame < tLen && f0[currentFrame] < 0.0){
  231. currentFrame++;
  232. currentTime = (double)currentFrame * framePeriod / 1000.0;
  233. currentPosition = (int)(currentTime * (double)fs);
  234. if(currentFrame == tLen) break;
  235. continue;
  236. }
  237. for(j = 0;j < fftl;j++) impulseResponse[j] = 0.0; // 配列は毎回初期化
  238. getOneFrameSegment(f0, tLen, specgram, aperiodicity, fftl, framePeriod, currentTime, fs, defaultF0,
  239. spectrum, cepstrum, impulseResponse, xLen);
  240. currentPosition = (int)(currentTime*(double)fs);
  241. for(j = 0;j < fftl/2;j++)
  242. {
  243. if(j+currentPosition >= xLen) break;
  244. synthesisOut[j+currentPosition] += impulseResponse[j];
  245. }
  246. // 更新
  247. currentTime += 1.0/(f0[currentFrame] == 0.0 ? defaultF0 : f0[currentFrame]);
  248. currentFrame = (int)(currentTime/(framePeriod/1000.0) + 0.5);
  249. currentPosition = (int)(currentTime*(double)fs);
  250. if(j+currentPosition >= xLen || currentFrame >= tLen) break;
  251. }
  252. free(cepstrum); free(spectrum);
  253. free(impulseResponse);
  254. return;
  255. }