test_synth.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /*
  2. Sekai - addons for the WORLD speech toolkit
  3. Copyright (C) 2016 Tobias Platen
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "VVDReader.h"
  16. #include "mfcc.h"
  17. #include "midi.h"
  18. #include <assert.h>
  19. #include <math.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <world/cheaptrick.h>
  23. #include <world/common.h>
  24. #include <world/constantnumbers.h>
  25. #include <world/matlabfunctions.h>
  26. #include <world/synthesis.h>
  27. #include <sndfile.h>
  28. #include "common.h"
  29. #include "epr.h"
  30. #ifdef USE_EPR
  31. typedef struct {
  32. EprSourceParams src;
  33. EprResonance *res;
  34. int n_res;
  35. double *residual;
  36. } EpRFrame;
  37. void EpRFrameAllocate(EpRFrame *frame, int n_res, int fft_size) {
  38. frame->n_res = n_res;
  39. frame->res = new EprResonance[n_res];
  40. frame->residual = new double[fft_size / 2 + 1];
  41. }
  42. void EpREstimate(EpRFrame *frame, double *spectrogram, int fft_size, int fs,
  43. double f0) {
  44. EprSourceEstimate(spectrogram, fft_size, fs, f0, &frame->src);
  45. EprVocalTractEstimate(spectrogram, fft_size, fs, f0, &frame->src,
  46. frame->residual, frame->res, frame->n_res);
  47. }
  48. int EpRFindIndex(double f, EpRFrame *frame, int max) {
  49. double min = 22 * 1000;
  50. int index = -1;
  51. for (int i = 0; i < 20; i++) {
  52. double delta = fabs(f - frame->res[i].f);
  53. if (delta < min) {
  54. min = delta;
  55. index = i;
  56. }
  57. }
  58. if (min > max) index = -1;
  59. // printf("MIN: %f INDEX: %i\n",min,index);
  60. return index;
  61. }
  62. #endif
  63. #define MAXPOINTS 8
  64. typedef struct {
  65. float x[MAXPOINTS];
  66. float y[MAXPOINTS];
  67. float l;
  68. float r;
  69. int n_points;
  70. int vvd_index;
  71. } segment_t;
  72. #define X_END(p) p.x[p.n_points - 1]
  73. // move to common.h
  74. float segment_interp(float x, float a, float b, float factor) {
  75. assert(x >= a);
  76. assert(b >= x);
  77. float right = b - x;
  78. return factor * (x - a) / (b - a);
  79. }
  80. float interp_linear(float *x, float *y, int nx, float ref) {
  81. int i;
  82. for (i = 0; i < nx - 1; i++) {
  83. if (ref >= x[i] && ref <= x[i + 1]) {
  84. float x1 = x[i];
  85. float x2 = x[i + 1];
  86. float tmp = (ref - x1) / (x2 - x1);
  87. return y[i] * (1 - tmp) + y[i + 1] * tmp;
  88. }
  89. }
  90. fprintf(stderr, "INTERP_LINEAR: out of range\n");
  91. return NAN;
  92. }
  93. class MiraiSynth {
  94. float _mid;
  95. float _left;
  96. float _right;
  97. bool _concat;
  98. bool _lastConcat;
  99. int _currentIndex;
  100. double _currentPos;
  101. ////////////////////////////////////////////////////
  102. MinimumPhaseAnalysis _minimum_phase;
  103. ForwardRealFFT _forward_real_fft;
  104. InverseRealFFT _inverse_real_fft;
  105. float *_vvdData;
  106. int _fs;
  107. int _fftSize;
  108. double *_spectrogram;
  109. double *_aperiodicity;
  110. double *_rightSpectrogram;
  111. double *_rightAperiodicity;
  112. double *_periodicResponse;
  113. double *_aperiodicResponse;
  114. double *_response;
  115. double *_olaFrame;
  116. int _cepstrumLength;
  117. float _vvdPitch;
  118. VVDReader *_vvd;
  119. // input seg
  120. int _seg_count;
  121. segment_t _seg[100];
  122. // output buffer
  123. double _signal_length;
  124. int _signal_length_samples;
  125. float *_signal;
  126. #ifdef USE_EPR
  127. EpRFrame _currentEpR;
  128. EpRFrame _leftEpR;
  129. EpRFrame _rightEpR;
  130. #endif
  131. void uncompressVVDFrame(double *spectrogram, double *aperiodicity) {
  132. float *tmp = _vvdData;
  133. _vvdPitch = *tmp;
  134. float *cepstrum1 = tmp + 1;
  135. float *cepstrum2 = cepstrum1 + _cepstrumLength;
  136. // assert(frequencyFromNote(vvdPitch)<400);
  137. // printf("uncompress %f\n",frequencyFromNote(vvdPitch));
  138. MFCCDecompress(&spectrogram, 1, _fs, _fftSize, _cepstrumLength, &cepstrum1,
  139. false);
  140. MFCCDecompress(&aperiodicity, 1, _fs, _fftSize, _cepstrumLength, &cepstrum2,
  141. true);
  142. }
  143. void update_concat_zone(segment_t *s) {
  144. assert(X_END(s[0]) == s[1].x[0]);
  145. _mid = X_END(s[0]);
  146. assert(X_END(s[0]) - s[0].r > s[0].x[0]);
  147. _left = X_END(s[0]) - s[0].r;
  148. assert(s[1].x[0] + s[1].l < X_END(s[1]));
  149. _right = s[1].x[0] + s[1].l;
  150. }
  151. public:
  152. int init() {
  153. // init vvd_reader
  154. _vvd = new VVDReader();
  155. _vvd->addVVD("vvd/0.vvd");
  156. _vvd->addVVD("vvd/1.vvd");
  157. _vvd->addVVD("vvd/2.vvd");
  158. _vvdData = new float[_vvd->getFrameSize() / sizeof(float)];
  159. _cepstrumLength = _vvd->getCepstrumLength();
  160. _fs = _vvd->getSamplerate();
  161. CheapTrickOption option;
  162. InitializeCheapTrickOption(&option);
  163. _fftSize = GetFFTSizeForCheapTrick(_fs, &option);
  164. _spectrogram = new double[_fftSize / 2 + 1];
  165. _aperiodicity = new double[_fftSize / 2 + 1];
  166. _rightSpectrogram = new double[_fftSize / 2 + 1];
  167. _rightAperiodicity = new double[_fftSize / 2 + 1];
  168. InitializeInverseRealFFT(_fftSize, &_inverse_real_fft);
  169. InitializeForwardRealFFT(_fftSize, &_forward_real_fft);
  170. InitializeMinimumPhaseAnalysis(_fftSize, &_minimum_phase);
  171. _periodicResponse = new double[_fftSize];
  172. _aperiodicResponse = new double[_fftSize];
  173. _response = new double[_fftSize];
  174. #ifdef USE_EPR
  175. EpRFrameAllocate(&_currentEpR, 20, _fftSize);
  176. EpRFrameAllocate(&_leftEpR, 20, _fftSize);
  177. EpRFrameAllocate(&_rightEpR, 20, _fftSize);
  178. #endif
  179. }
  180. int prepareSynth() {
  181. // setup test segment list
  182. _seg_count = 3;
  183. _seg[0].x[0] = 0.0;
  184. _seg[0].y[0] = 0.2;
  185. _seg[0].x[1] = 1.3;
  186. _seg[0].y[1] = 0.8;
  187. _seg[0].n_points = 2;
  188. _seg[0].r = 0.2;
  189. _seg[0].vvd_index = 0;
  190. _seg[1].x[0] = 1.3;
  191. _seg[1].y[0] = 0.2;
  192. _seg[1].x[1] = 2.7;
  193. _seg[1].y[1] = 0.8;
  194. _seg[1].n_points = 2;
  195. _seg[1].l = 0.1;
  196. _seg[1].r = 0.2;
  197. _seg[1].vvd_index = 1;
  198. _seg[2].x[0] = 2.7;
  199. _seg[2].y[0] = 0.2;
  200. _seg[2].x[1] = 3.9;
  201. _seg[2].y[1] = 0.8;
  202. _seg[2].n_points = 2;
  203. _seg[2].l = 0.2;
  204. _seg[2].vvd_index = 2;
  205. _signal_length = X_END(_seg[_seg_count - 1]);
  206. _signal_length_samples = _fs * _signal_length + 2 * _fftSize;
  207. _signal = new float[_signal_length_samples];
  208. update_concat_zone(_seg);
  209. }
  210. int runSynth() {
  211. printf("currentPos %f\n", _currentPos);
  212. if (_currentIndex > _seg_count - 1) {
  213. printf("END OF FILE\n");
  214. return 0;
  215. }
  216. float f0 = 200;
  217. double y_pos = interp_linear(_seg[_currentIndex].x, _seg[_currentIndex].y,
  218. _seg[_currentIndex].n_points, _currentPos);
  219. int idx = _seg[_currentIndex].vvd_index;
  220. _vvd->selectVVD(idx);
  221. float framePeriod = _vvd->getFramePeriod();
  222. /////////
  223. float fractionalIndex = y_pos * 1000.0 / framePeriod;
  224. if (_vvd->getSegment(fractionalIndex, _vvdData) == false) {
  225. printf("PANIC: cannot read from VVD\n");
  226. exit(1);
  227. }
  228. uncompressVVDFrame(_spectrogram, _aperiodicity);
  229. if (_currentPos >= _left && _concat == false && _lastConcat == false) {
  230. printf("START CONCAT %f %f %f\n", _left, _mid, _right);
  231. double y_left =
  232. interp_linear(_seg[_currentIndex].x, _seg[_currentIndex].y,
  233. _seg[_currentIndex].n_points, _mid);
  234. double y_right =
  235. interp_linear(_seg[_currentIndex + 1].x, _seg[_currentIndex + 1].y,
  236. _seg[_currentIndex + 1].n_points, _mid);
  237. float fractionalIndex_left = y_left * 1000.0 / framePeriod;
  238. float fractionalIndex_right = y_right * 1000.0 / framePeriod;
  239. _vvd->selectVVD(_seg[_currentIndex].vvd_index);
  240. if (_vvd->getSegment(fractionalIndex_left, _vvdData) == false) {
  241. printf("PANIC: cannot read from VVD [L]\n");
  242. exit(1);
  243. }
  244. #ifdef USE_EPR
  245. uncompressVVDFrame(_TODOspectrogram, _TODOaperiodicity);
  246. EpREstimate(&_leftEpR, _spectrogram, _fftSize, _fs, f0);
  247. #endif
  248. _vvd->selectVVD(_seg[_currentIndex + 1].vvd_index);
  249. if (_vvd->getSegment(fractionalIndex_right, _vvdData) == false) {
  250. printf("PANIC: cannot read from VVD [R]\n");
  251. exit(1);
  252. }
  253. uncompressVVDFrame(_rightSpectrogram, _rightAperiodicity);
  254. #ifdef USE_EPR
  255. EpREstimate(&_rightEpR, _spectrogram, _fftSize, _fs, f0);
  256. #endif
  257. _concat = true;
  258. }
  259. #ifndef BETA
  260. if (_concat) {
  261. if (_currentPos < _mid) {
  262. double factor;
  263. factor = segment_interp(_currentPos, _left, _mid, 1.0);
  264. printf("%f\n", factor);
  265. for (int i = 0; i < _fftSize / 2 + 1; i++) {
  266. _spectrogram[i] =
  267. _spectrogram[i] * (1 - factor) + _rightSpectrogram[i] * factor;
  268. _aperiodicity[i] =
  269. _aperiodicity[i] * (1 - factor) + _rightAperiodicity[i] * factor;
  270. }
  271. }
  272. }
  273. #endif
  274. #ifdef USE_EPR
  275. if (_concat) {
  276. EpREstimate(&_currentEpR, _spectrogram, _fftSize, _fs, f0);
  277. double factor;
  278. char side;
  279. if (_currentPos < _mid) {
  280. side = 'L';
  281. factor = segment_interp(_currentPos, _left, _mid, -0.5);
  282. } else {
  283. side = 'R';
  284. factor = segment_interp(_currentPos, _right, _mid, +0.5);
  285. }
  286. #define CORR(X) \
  287. _currentEpR.src.X += factor * (_leftEpR.src.X - _rightEpR.src.X);
  288. CORR(slope)
  289. CORR(slopedepthdb)
  290. CORR(gaindb)
  291. // printf("C SRC: %f %f %f
  292. // [%c]\n",_currentEpR.src.gaindb,_currentEpR.src.slope,_currentEpR.src.slopedepthdb,side);
  293. #undef CORR
  294. for (int i = 0; i < 20; i++) {
  295. if (_currentEpR.res[i].f > 0) {
  296. int l = EpRFindIndex(_currentEpR.res[i].f, &_leftEpR, 75);
  297. int r = EpRFindIndex(_currentEpR.res[i].f, &_rightEpR, 75);
  298. double lfrq = 0;
  299. double rfrq = 0;
  300. if (l >= 0) lfrq = _leftEpR.res[l].f;
  301. if (r >= 0) rfrq = _rightEpR.res[r].f;
  302. if (l > -1 && r > -1) {
  303. // printf("EpR change: %f %f\n",_currentEpR.res[i].f,factor);
  304. #define CORR(X) \
  305. _currentEpR.res[i].X += factor * (_leftEpR.res[l].X - _rightEpR.res[r].X);
  306. CORR(gain_db);
  307. CORR(f);
  308. CORR(bw);
  309. #undef CORR
  310. // printf("RES[%i]: %f %f %f [L=%i(%f) R=%i(%f)]
  311. // CORR\n",i,_currentEpR.res[i].f,_currentEpR.res[i].bw,_currentEpR.res[i].gain_db,l,lfrq,r,rfrq);
  312. EprResonanceUpdate(&_currentEpR.res[i], _fs);
  313. }
  314. }
  315. }
  316. for (int i = 0; i < _fftSize / 2 + 1; i++) {
  317. double frq = i * 1.0 * _fs / _fftSize;
  318. double dB = EprAtFrequency(&_currentEpR.src, frq, _fs, _currentEpR.res,
  319. _currentEpR.n_res);
  320. // dB-=10;
  321. dB += _currentEpR.residual[i];
  322. _spectrogram[i] = exp(dB / TWENTY_OVER_LOG10);
  323. }
  324. }
  325. #endif
  326. if (_concat == true && _currentPos >= _right) {
  327. _concat = false;
  328. printf("END CONCAT %i\n", _currentIndex);
  329. if (_currentIndex < _seg_count - 1)
  330. update_concat_zone(&_seg[_currentIndex]);
  331. else
  332. _lastConcat = true;
  333. }
  334. int noise_size = ((float)_fs) / f0;
  335. double current_vuv = 1.0; // only vowels
  336. SynthesisOneImpulseResponse(
  337. _fftSize, noise_size, current_vuv, _spectrogram, _aperiodicity,
  338. &_forward_real_fft, &_inverse_real_fft, &_minimum_phase,
  339. _periodicResponse, _aperiodicResponse, _response);
  340. int samplePos = _currentPos * _fs;
  341. for (int i = 0; i < _fftSize; i++) {
  342. int index = i + samplePos;
  343. assert(index < _signal_length_samples);
  344. _signal[index] += _response[i] * 0.5;
  345. }
  346. // TODO: synthesis
  347. double period = 1.0 / f0;
  348. _currentPos += period; // period
  349. double currentEnd = X_END(_seg[_currentIndex]);
  350. if (_currentPos > currentEnd) {
  351. _currentIndex++;
  352. }
  353. printf("END step\n");
  354. return 1;
  355. //}
  356. // write signal to sndfile
  357. }
  358. void writeWavFile() {
  359. SF_INFO info = {0};
  360. info.samplerate = _fs;
  361. info.channels = 1;
  362. info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
  363. SNDFILE *sf = sf_open("test_synth.wav", SFM_WRITE, &info);
  364. sf_write_float(sf, _signal, _signal_length_samples);
  365. sf_close(sf);
  366. }
  367. };
  368. int main() {
  369. MiraiSynth synth;
  370. // alloc
  371. // synthN
  372. synth.init();
  373. synth.prepareSynth();
  374. while (synth.runSynth())
  375. ;
  376. synth.writeWavFile();
  377. }