123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439 |
- /*
- Sekai - addons for the WORLD speech toolkit
- Copyright (C) 2016 Tobias Platen
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #include "VVDReader.h"
- #include "mfcc.h"
- #include "midi.h"
- #include <assert.h>
- #include <math.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <world/cheaptrick.h>
- #include <world/common.h>
- #include <world/constantnumbers.h>
- #include <world/matlabfunctions.h>
- #include <world/synthesis.h>
- #include <sndfile.h>
- #include "common.h"
- #include "epr.h"
- #ifdef USE_EPR
- typedef struct {
- EprSourceParams src;
- EprResonance *res;
- int n_res;
- double *residual;
- } EpRFrame;
- void EpRFrameAllocate(EpRFrame *frame, int n_res, int fft_size) {
- frame->n_res = n_res;
- frame->res = new EprResonance[n_res];
- frame->residual = new double[fft_size / 2 + 1];
- }
- void EpREstimate(EpRFrame *frame, double *spectrogram, int fft_size, int fs,
- double f0) {
- EprSourceEstimate(spectrogram, fft_size, fs, f0, &frame->src);
- EprVocalTractEstimate(spectrogram, fft_size, fs, f0, &frame->src,
- frame->residual, frame->res, frame->n_res);
- }
- int EpRFindIndex(double f, EpRFrame *frame, int max) {
- double min = 22 * 1000;
- int index = -1;
- for (int i = 0; i < 20; i++) {
- double delta = fabs(f - frame->res[i].f);
- if (delta < min) {
- min = delta;
- index = i;
- }
- }
- if (min > max) index = -1;
- // printf("MIN: %f INDEX: %i\n",min,index);
- return index;
- }
- #endif
- #define MAXPOINTS 8
- typedef struct {
- float x[MAXPOINTS];
- float y[MAXPOINTS];
- float l;
- float r;
- int n_points;
- int vvd_index;
- } segment_t;
- #define X_END(p) p.x[p.n_points - 1]
- // move to common.h
- float segment_interp(float x, float a, float b, float factor) {
- assert(x >= a);
- assert(b >= x);
- float right = b - x;
- return factor * (x - a) / (b - a);
- }
- float interp_linear(float *x, float *y, int nx, float ref) {
- int i;
- for (i = 0; i < nx - 1; i++) {
- if (ref >= x[i] && ref <= x[i + 1]) {
- float x1 = x[i];
- float x2 = x[i + 1];
- float tmp = (ref - x1) / (x2 - x1);
- return y[i] * (1 - tmp) + y[i + 1] * tmp;
- }
- }
- fprintf(stderr, "INTERP_LINEAR: out of range\n");
- return NAN;
- }
- class MiraiSynth {
- float _mid;
- float _left;
- float _right;
- bool _concat;
- bool _lastConcat;
- int _currentIndex;
- double _currentPos;
- ////////////////////////////////////////////////////
- MinimumPhaseAnalysis _minimum_phase;
- ForwardRealFFT _forward_real_fft;
- InverseRealFFT _inverse_real_fft;
- float *_vvdData;
- int _fs;
- int _fftSize;
- double *_spectrogram;
- double *_aperiodicity;
- double *_rightSpectrogram;
- double *_rightAperiodicity;
- double *_periodicResponse;
- double *_aperiodicResponse;
- double *_response;
- double *_olaFrame;
- int _cepstrumLength;
- float _vvdPitch;
- VVDReader *_vvd;
- // input seg
- int _seg_count;
- segment_t _seg[100];
- // output buffer
- double _signal_length;
- int _signal_length_samples;
- float *_signal;
- #ifdef USE_EPR
- EpRFrame _currentEpR;
- EpRFrame _leftEpR;
- EpRFrame _rightEpR;
- #endif
- void uncompressVVDFrame(double *spectrogram, double *aperiodicity) {
- float *tmp = _vvdData;
- _vvdPitch = *tmp;
- float *cepstrum1 = tmp + 1;
- float *cepstrum2 = cepstrum1 + _cepstrumLength;
- // assert(frequencyFromNote(vvdPitch)<400);
- // printf("uncompress %f\n",frequencyFromNote(vvdPitch));
- MFCCDecompress(&spectrogram, 1, _fs, _fftSize, _cepstrumLength, &cepstrum1,
- false);
- MFCCDecompress(&aperiodicity, 1, _fs, _fftSize, _cepstrumLength, &cepstrum2,
- true);
- }
- void update_concat_zone(segment_t *s) {
- assert(X_END(s[0]) == s[1].x[0]);
- _mid = X_END(s[0]);
- assert(X_END(s[0]) - s[0].r > s[0].x[0]);
- _left = X_END(s[0]) - s[0].r;
- assert(s[1].x[0] + s[1].l < X_END(s[1]));
- _right = s[1].x[0] + s[1].l;
- }
- public:
- int init() {
- // init vvd_reader
- _vvd = new VVDReader();
- _vvd->addVVD("vvd/0.vvd");
- _vvd->addVVD("vvd/1.vvd");
- _vvd->addVVD("vvd/2.vvd");
- _vvdData = new float[_vvd->getFrameSize() / sizeof(float)];
- _cepstrumLength = _vvd->getCepstrumLength();
- _fs = _vvd->getSamplerate();
- CheapTrickOption option;
- InitializeCheapTrickOption(&option);
- _fftSize = GetFFTSizeForCheapTrick(_fs, &option);
- _spectrogram = new double[_fftSize / 2 + 1];
- _aperiodicity = new double[_fftSize / 2 + 1];
- _rightSpectrogram = new double[_fftSize / 2 + 1];
- _rightAperiodicity = new double[_fftSize / 2 + 1];
- InitializeInverseRealFFT(_fftSize, &_inverse_real_fft);
- InitializeForwardRealFFT(_fftSize, &_forward_real_fft);
- InitializeMinimumPhaseAnalysis(_fftSize, &_minimum_phase);
- _periodicResponse = new double[_fftSize];
- _aperiodicResponse = new double[_fftSize];
- _response = new double[_fftSize];
- #ifdef USE_EPR
- EpRFrameAllocate(&_currentEpR, 20, _fftSize);
- EpRFrameAllocate(&_leftEpR, 20, _fftSize);
- EpRFrameAllocate(&_rightEpR, 20, _fftSize);
- #endif
- }
- int prepareSynth() {
- // setup test segment list
- _seg_count = 3;
- _seg[0].x[0] = 0.0;
- _seg[0].y[0] = 0.2;
- _seg[0].x[1] = 1.3;
- _seg[0].y[1] = 0.8;
- _seg[0].n_points = 2;
- _seg[0].r = 0.2;
- _seg[0].vvd_index = 0;
- _seg[1].x[0] = 1.3;
- _seg[1].y[0] = 0.2;
- _seg[1].x[1] = 2.7;
- _seg[1].y[1] = 0.8;
- _seg[1].n_points = 2;
- _seg[1].l = 0.1;
- _seg[1].r = 0.2;
- _seg[1].vvd_index = 1;
- _seg[2].x[0] = 2.7;
- _seg[2].y[0] = 0.2;
- _seg[2].x[1] = 3.9;
- _seg[2].y[1] = 0.8;
- _seg[2].n_points = 2;
- _seg[2].l = 0.2;
- _seg[2].vvd_index = 2;
- _signal_length = X_END(_seg[_seg_count - 1]);
- _signal_length_samples = _fs * _signal_length + 2 * _fftSize;
- _signal = new float[_signal_length_samples];
- update_concat_zone(_seg);
- }
- int runSynth() {
- printf("currentPos %f\n", _currentPos);
- if (_currentIndex > _seg_count - 1) {
- printf("END OF FILE\n");
- return 0;
- }
- float f0 = 200;
- double y_pos = interp_linear(_seg[_currentIndex].x, _seg[_currentIndex].y,
- _seg[_currentIndex].n_points, _currentPos);
- int idx = _seg[_currentIndex].vvd_index;
- _vvd->selectVVD(idx);
- float framePeriod = _vvd->getFramePeriod();
- /////////
- float fractionalIndex = y_pos * 1000.0 / framePeriod;
- if (_vvd->getSegment(fractionalIndex, _vvdData) == false) {
- printf("PANIC: cannot read from VVD\n");
- exit(1);
- }
- uncompressVVDFrame(_spectrogram, _aperiodicity);
- if (_currentPos >= _left && _concat == false && _lastConcat == false) {
- printf("START CONCAT %f %f %f\n", _left, _mid, _right);
- double y_left =
- interp_linear(_seg[_currentIndex].x, _seg[_currentIndex].y,
- _seg[_currentIndex].n_points, _mid);
- double y_right =
- interp_linear(_seg[_currentIndex + 1].x, _seg[_currentIndex + 1].y,
- _seg[_currentIndex + 1].n_points, _mid);
- float fractionalIndex_left = y_left * 1000.0 / framePeriod;
- float fractionalIndex_right = y_right * 1000.0 / framePeriod;
- _vvd->selectVVD(_seg[_currentIndex].vvd_index);
- if (_vvd->getSegment(fractionalIndex_left, _vvdData) == false) {
- printf("PANIC: cannot read from VVD [L]\n");
- exit(1);
- }
- #ifdef USE_EPR
- uncompressVVDFrame(_TODOspectrogram, _TODOaperiodicity);
- EpREstimate(&_leftEpR, _spectrogram, _fftSize, _fs, f0);
- #endif
- _vvd->selectVVD(_seg[_currentIndex + 1].vvd_index);
- if (_vvd->getSegment(fractionalIndex_right, _vvdData) == false) {
- printf("PANIC: cannot read from VVD [R]\n");
- exit(1);
- }
- uncompressVVDFrame(_rightSpectrogram, _rightAperiodicity);
- #ifdef USE_EPR
- EpREstimate(&_rightEpR, _spectrogram, _fftSize, _fs, f0);
- #endif
- _concat = true;
- }
- #ifndef BETA
- if (_concat) {
- if (_currentPos < _mid) {
- double factor;
- factor = segment_interp(_currentPos, _left, _mid, 1.0);
- printf("%f\n", factor);
- for (int i = 0; i < _fftSize / 2 + 1; i++) {
- _spectrogram[i] =
- _spectrogram[i] * (1 - factor) + _rightSpectrogram[i] * factor;
- _aperiodicity[i] =
- _aperiodicity[i] * (1 - factor) + _rightAperiodicity[i] * factor;
- }
- }
- }
- #endif
- #ifdef USE_EPR
- if (_concat) {
- EpREstimate(&_currentEpR, _spectrogram, _fftSize, _fs, f0);
- double factor;
- char side;
- if (_currentPos < _mid) {
- side = 'L';
- factor = segment_interp(_currentPos, _left, _mid, -0.5);
- } else {
- side = 'R';
- factor = segment_interp(_currentPos, _right, _mid, +0.5);
- }
- #define CORR(X) \
- _currentEpR.src.X += factor * (_leftEpR.src.X - _rightEpR.src.X);
- CORR(slope)
- CORR(slopedepthdb)
- CORR(gaindb)
- // printf("C SRC: %f %f %f
- // [%c]\n",_currentEpR.src.gaindb,_currentEpR.src.slope,_currentEpR.src.slopedepthdb,side);
- #undef CORR
- for (int i = 0; i < 20; i++) {
- if (_currentEpR.res[i].f > 0) {
- int l = EpRFindIndex(_currentEpR.res[i].f, &_leftEpR, 75);
- int r = EpRFindIndex(_currentEpR.res[i].f, &_rightEpR, 75);
- double lfrq = 0;
- double rfrq = 0;
- if (l >= 0) lfrq = _leftEpR.res[l].f;
- if (r >= 0) rfrq = _rightEpR.res[r].f;
- if (l > -1 && r > -1) {
- // printf("EpR change: %f %f\n",_currentEpR.res[i].f,factor);
- #define CORR(X) \
- _currentEpR.res[i].X += factor * (_leftEpR.res[l].X - _rightEpR.res[r].X);
- CORR(gain_db);
- CORR(f);
- CORR(bw);
- #undef CORR
- // printf("RES[%i]: %f %f %f [L=%i(%f) R=%i(%f)]
- // CORR\n",i,_currentEpR.res[i].f,_currentEpR.res[i].bw,_currentEpR.res[i].gain_db,l,lfrq,r,rfrq);
- EprResonanceUpdate(&_currentEpR.res[i], _fs);
- }
- }
- }
- for (int i = 0; i < _fftSize / 2 + 1; i++) {
- double frq = i * 1.0 * _fs / _fftSize;
- double dB = EprAtFrequency(&_currentEpR.src, frq, _fs, _currentEpR.res,
- _currentEpR.n_res);
- // dB-=10;
- dB += _currentEpR.residual[i];
- _spectrogram[i] = exp(dB / TWENTY_OVER_LOG10);
- }
- }
- #endif
- if (_concat == true && _currentPos >= _right) {
- _concat = false;
- printf("END CONCAT %i\n", _currentIndex);
- if (_currentIndex < _seg_count - 1)
- update_concat_zone(&_seg[_currentIndex]);
- else
- _lastConcat = true;
- }
- int noise_size = ((float)_fs) / f0;
- double current_vuv = 1.0; // only vowels
- SynthesisOneImpulseResponse(
- _fftSize, noise_size, current_vuv, _spectrogram, _aperiodicity,
- &_forward_real_fft, &_inverse_real_fft, &_minimum_phase,
- _periodicResponse, _aperiodicResponse, _response);
- int samplePos = _currentPos * _fs;
- for (int i = 0; i < _fftSize; i++) {
- int index = i + samplePos;
- assert(index < _signal_length_samples);
- _signal[index] += _response[i] * 0.5;
- }
- // TODO: synthesis
- double period = 1.0 / f0;
- _currentPos += period; // period
- double currentEnd = X_END(_seg[_currentIndex]);
- if (_currentPos > currentEnd) {
- _currentIndex++;
- }
- printf("END step\n");
- return 1;
- //}
- // write signal to sndfile
- }
- void writeWavFile() {
- SF_INFO info = {0};
- info.samplerate = _fs;
- info.channels = 1;
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- SNDFILE *sf = sf_open("test_synth.wav", SFM_WRITE, &info);
- sf_write_float(sf, _signal, _signal_length_samples);
- sf_close(sf);
- }
- };
- int main() {
- MiraiSynth synth;
- // alloc
- // synthN
- synth.init();
- synth.prepareSynth();
- while (synth.runSynth())
- ;
- synth.writeWavFile();
- }
|