123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488 |
- //-----------------------------------------------------------------------------
- // Copyright 2012 Masanori Morise
- // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
- // Last update: 2018/12/13
- //
- // Voice synthesis based on f0, spectrogram and aperiodicity.
- // forward_real_fft, inverse_real_fft and minimum_phase are used to speed up.
- //-----------------------------------------------------------------------------
- #include "world/synthesis.h"
- #include <math.h>
- #include "world/common.h"
- #include "world/constantnumbers.h"
- #include "world/matlabfunctions.h"
- #include <assert.h>
- namespace {
- static void GetNoiseSpectrum(int noise_size, int fft_size,
- const ForwardRealFFT *forward_real_fft) {
- double average = 0.0;
- for (int i = 0; i < noise_size; ++i) {
- forward_real_fft->waveform[i] = randn();
- average += forward_real_fft->waveform[i];
- }
- average /= noise_size;
- for (int i = 0; i < noise_size; ++i)
- forward_real_fft->waveform[i] -= average;
- for (int i = noise_size; i < fft_size; ++i)
- forward_real_fft->waveform[i] = 0.0;
- fft_execute(forward_real_fft->forward_fft);
- }
- //-----------------------------------------------------------------------------
- // GetAperiodicResponse() calculates an aperiodic response.
- //-----------------------------------------------------------------------------
- static void GetAperiodicResponse(int noise_size, int fft_size,
- const double *spectrum, const double *aperiodic_ratio, double current_vuv,
- const ForwardRealFFT *forward_real_fft,
- const InverseRealFFT *inverse_real_fft,
- const MinimumPhaseAnalysis *minimum_phase, double *aperiodic_response) {
- GetNoiseSpectrum(noise_size, fft_size, forward_real_fft);
- if (current_vuv != 0.0)
- for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
- minimum_phase->log_spectrum[i] =
- log(spectrum[i] * aperiodic_ratio[i]) / 2.0;
- else
- for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
- minimum_phase->log_spectrum[i] = log(spectrum[i]) / 2.0;
- GetMinimumPhaseSpectrum(minimum_phase);
- for (int i = 0; i <= fft_size / 2; ++i) {
- inverse_real_fft->spectrum[i][0] =
- minimum_phase->minimum_phase_spectrum[i][0] *
- forward_real_fft->spectrum[i][0] -
- minimum_phase->minimum_phase_spectrum[i][1] *
- forward_real_fft->spectrum[i][1];
- inverse_real_fft->spectrum[i][1] =
- minimum_phase->minimum_phase_spectrum[i][0] *
- forward_real_fft->spectrum[i][1] +
- minimum_phase->minimum_phase_spectrum[i][1] *
- forward_real_fft->spectrum[i][0];
- }
- fft_execute(inverse_real_fft->inverse_fft);
- fftshift(inverse_real_fft->waveform, fft_size, aperiodic_response);
- }
- //-----------------------------------------------------------------------------
- // RemoveDCComponent()
- //-----------------------------------------------------------------------------
- static void RemoveDCComponent(const double *periodic_response, int fft_size,
- const double *dc_remover, double *new_periodic_response) {
- double dc_component = 0.0;
- for (int i = fft_size / 2; i < fft_size; ++i)
- dc_component += periodic_response[i];
- for (int i = 0; i < fft_size / 2; ++i)
- new_periodic_response[i] = -dc_component * dc_remover[i];
- for (int i = fft_size / 2; i < fft_size; ++i)
- new_periodic_response[i] -= dc_component * dc_remover[i];
- }
- //-----------------------------------------------------------------------------
- // GetSpectrumWithFractionalTimeShift() calculates a periodic spectrum with
- // the fractional time shift under 1/fs.
- //-----------------------------------------------------------------------------
- static void GetSpectrumWithFractionalTimeShift(int fft_size,
- double coefficient, const InverseRealFFT *inverse_real_fft) {
- double re, im, re2, im2;
- for (int i = 0; i <= fft_size / 2; ++i) {
- re = inverse_real_fft->spectrum[i][0];
- im = inverse_real_fft->spectrum[i][1];
- re2 = cos(coefficient * i);
- im2 = sqrt(1.0 - re2 * re2); // sin(pshift)
- inverse_real_fft->spectrum[i][0] = re * re2 + im * im2;
- inverse_real_fft->spectrum[i][1] = im * re2 - re * im2;
- }
- }
- //-----------------------------------------------------------------------------
- // GetPeriodicResponse() calculates a periodic response.
- //-----------------------------------------------------------------------------
- static void GetPeriodicResponse(int fft_size, const double *spectrum,
- const double *aperiodic_ratio, double current_vuv,
- const InverseRealFFT *inverse_real_fft,
- const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
- double fractional_time_shift, int fs, double *periodic_response) {
- if (current_vuv <= 0.5 || aperiodic_ratio[0] > 0.999) {
- for (int i = 0; i < fft_size; ++i) periodic_response[i] = 0.0;
- return;
- }
- for (int i = 0; i <= minimum_phase->fft_size / 2; ++i)
- minimum_phase->log_spectrum[i] =
- log(spectrum[i] * (1.0 - aperiodic_ratio[i]) +
- world::kMySafeGuardMinimum) / 2.0;
- GetMinimumPhaseSpectrum(minimum_phase);
- for (int i = 0; i <= fft_size / 2; ++i) {
- inverse_real_fft->spectrum[i][0] =
- minimum_phase->minimum_phase_spectrum[i][0];
- inverse_real_fft->spectrum[i][1] =
- minimum_phase->minimum_phase_spectrum[i][1];
- }
- // apply fractional time delay of fractional_time_shift seconds
- // using linear phase shift
- double coefficient =
- 2.0 * world::kPi * fractional_time_shift * fs / fft_size;
- GetSpectrumWithFractionalTimeShift(fft_size, coefficient, inverse_real_fft);
- fft_execute(inverse_real_fft->inverse_fft);
- fftshift(inverse_real_fft->waveform, fft_size, periodic_response);
- RemoveDCComponent(periodic_response, fft_size, dc_remover,
- periodic_response);
- }
- static void GetSpectralEnvelope(double current_time, double frame_period,
- int f0_length, const double * const *spectrogram, int fft_size,
- double *spectral_envelope) {
- int current_frame_floor = MyMinInt(f0_length - 1,
- static_cast<int>(floor(current_time / frame_period)));
- int current_frame_ceil = MyMinInt(f0_length - 1,
- static_cast<int>(ceil(current_time / frame_period)));
- double interpolation = current_time / frame_period - current_frame_floor;
- if (current_frame_floor == current_frame_ceil)
- for (int i = 0; i <= fft_size / 2; ++i)
- spectral_envelope[i] = fabs(spectrogram[current_frame_floor][i]);
- else
- for (int i = 0; i <= fft_size / 2; ++i)
- spectral_envelope[i] =
- (1.0 - interpolation) * fabs(spectrogram[current_frame_floor][i]) +
- interpolation * fabs(spectrogram[current_frame_ceil][i]);
- }
- static void GetAperiodicRatio(double current_time, double frame_period,
- int f0_length, const double * const *aperiodicity, int fft_size,
- double *aperiodic_spectrum) {
- int current_frame_floor = MyMinInt(f0_length - 1,
- static_cast<int>(floor(current_time / frame_period)));
- int current_frame_ceil = MyMinInt(f0_length - 1,
- static_cast<int>(ceil(current_time / frame_period)));
- double interpolation = current_time / frame_period - current_frame_floor;
- if (current_frame_floor == current_frame_ceil)
- for (int i = 0; i <= fft_size / 2; ++i)
- aperiodic_spectrum[i] =
- pow(GetSafeAperiodicity(aperiodicity[current_frame_floor][i]), 2.0);
- else
- for (int i = 0; i <= fft_size / 2; ++i)
- aperiodic_spectrum[i] = pow((1.0 - interpolation) *
- GetSafeAperiodicity(aperiodicity[current_frame_floor][i]) +
- interpolation *
- GetSafeAperiodicity(aperiodicity[current_frame_ceil][i]), 2.0);
- }
- //-----------------------------------------------------------------------------
- // GetOneFrameSegment() calculates a periodic and aperiodic response at a time.
- //-----------------------------------------------------------------------------
- static void GetOneFrameSegment(double current_vuv, int noise_size,
- const double * const *spectrogram, int fft_size,
- const double * const *aperiodicity, int f0_length, double frame_period,
- double current_time, double fractional_time_shift, int fs,
- const ForwardRealFFT *forward_real_fft,
- const InverseRealFFT *inverse_real_fft,
- const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
- double *response) {
- double *aperiodic_response = new double[fft_size];
- double *periodic_response = new double[fft_size];
- double *spectral_envelope = new double[fft_size];
- double *aperiodic_ratio = new double[fft_size];
- GetSpectralEnvelope(current_time, frame_period, f0_length, spectrogram,
- fft_size, spectral_envelope);
- GetAperiodicRatio(current_time, frame_period, f0_length, aperiodicity,
- fft_size, aperiodic_ratio);
- // Synthesis of the periodic response
- GetPeriodicResponse(fft_size, spectral_envelope, aperiodic_ratio,
- current_vuv, inverse_real_fft, minimum_phase, dc_remover,
- fractional_time_shift, fs, periodic_response);
- // Synthesis of the aperiodic response
- GetAperiodicResponse(noise_size, fft_size, spectral_envelope,
- aperiodic_ratio, current_vuv, forward_real_fft,
- inverse_real_fft, minimum_phase, aperiodic_response);
- double sqrt_noise_size = sqrt(static_cast<double>(noise_size));
- for (int i = 0; i < fft_size; ++i)
- response[i] =
- (periodic_response[i] * sqrt_noise_size + aperiodic_response[i]) /
- fft_size;
- delete[] spectral_envelope;
- delete[] aperiodic_ratio;
- delete[] periodic_response;
- delete[] aperiodic_response;
- }
- static void GetOneFrameSegmentMBR(int index, const double* f0,
- const double * const *spectrogram, int fft_size,
- const double * const *aperiodicity, int f0_length, double frame_period,
- //double current_time, double fractional_time_shift, int fs,
- int fs,
- const ForwardRealFFT *forward_real_fft,
- const InverseRealFFT *inverse_real_fft,
- const MinimumPhaseAnalysis *minimum_phase, const double *dc_remover,
- double *response)
- {
- auto spectral_envelope = spectrogram[index];
- auto aperiodic_ratio = aperiodicity[index];
-
- double *aperiodic_response = new double[fft_size];
- double *periodic_response = new double[fft_size];
-
- double current_f0=f0[index];
- double fractional_time_shift=0;
-
- int noise_size=fs/500;
-
- if(current_f0>0)
- noise_size = fs/current_f0;
-
- if(noise_size>fft_size/2) noise_size=fft_size/2;
- //printf("GetOneFrameSegmentMBR noise_size=%i current_f0=%f\n",noise_size,current_f0);
-
- if(noise_size==0) return;
-
- // Synthesis of the periodic response
- GetPeriodicResponse(fft_size, spectral_envelope, aperiodic_ratio,
- current_f0, inverse_real_fft, minimum_phase, dc_remover,
- fractional_time_shift, fs, periodic_response);
- // Synthesis of the aperiodic response
- GetAperiodicResponse(noise_size, fft_size, spectral_envelope,
- aperiodic_ratio, current_f0, forward_real_fft,
- inverse_real_fft, minimum_phase, aperiodic_response);
- double sqrt_noise_size = sqrt(static_cast<double>(noise_size));
- for (int i = 0; i < fft_size; ++i)
- response[i] =
- (periodic_response[i] * sqrt_noise_size + aperiodic_response[i]) /
- fft_size;
-
- //response[0]=1.0;
-
- delete[] periodic_response;
- delete[] aperiodic_response;
- }
- static void GetTemporalParametersForTimeBase(const double *f0, int f0_length,
- int fs, int y_length, double frame_period, double lowest_f0,
- double *time_axis, double *coarse_time_axis, double *coarse_f0,
- double *coarse_vuv) {
- for (int i = 0; i < y_length; ++i)
- time_axis[i] = i / static_cast<double>(fs);
- // the array 'coarse_time_axis' is supposed to have 'f0_length + 1' positions
- for (int i = 0; i < f0_length; ++i) {
- coarse_time_axis[i] = i * frame_period;
- coarse_f0[i] = f0[i] < lowest_f0 ? 0.0 : f0[i];
- coarse_vuv[i] = coarse_f0[i] == 0.0 ? 0.0 : 1.0;
- }
- coarse_time_axis[f0_length] = f0_length * frame_period;
- coarse_f0[f0_length] = coarse_f0[f0_length - 1] * 2 -
- coarse_f0[f0_length - 2];
- coarse_vuv[f0_length] = coarse_vuv[f0_length - 1] * 2 -
- coarse_vuv[f0_length - 2];
- }
- static int GetPulseLocationsForTimeBase(const double *interpolated_f0,
- const double *time_axis, int y_length, int fs, double *pulse_locations,
- int *pulse_locations_index, double *pulse_locations_time_shift) {
- double *total_phase = new double[y_length];
- double *wrap_phase = new double[y_length];
- double *wrap_phase_abs = new double[y_length - 1];
- total_phase[0] = 2.0 * world::kPi * interpolated_f0[0] / fs;
- wrap_phase[0] = fmod(total_phase[0], 2.0 * world::kPi);
- for (int i = 1; i < y_length; ++i) {
- total_phase[i] = total_phase[i - 1] +
- 2.0 * world::kPi * interpolated_f0[i] / fs;
- wrap_phase[i] = fmod(total_phase[i], 2.0 * world::kPi);
- wrap_phase_abs[i - 1] = fabs(wrap_phase[i] - wrap_phase[i - 1]);
- }
- int number_of_pulses = 0;
- for (int i = 0; i < y_length - 1; ++i) {
- if (wrap_phase_abs[i] > world::kPi) {
- pulse_locations[number_of_pulses] = time_axis[i];
- pulse_locations_index[number_of_pulses] = i;
- // calculate the time shift in seconds between exact fractional pulse
- // position and the integer pulse position (sample i)
- // as we don't have access to the exact pulse position, we infer it
- // from the point between sample i and sample i + 1 where the
- // accummulated phase cross a multiple of 2pi
- // this point is found by solving y1 + x * (y2 - y1) = 0 for x, where y1
- // and y2 are the phases corresponding to sample i and i + 1, offset so
- // they cross zero; x >= 0
- double y1 = wrap_phase[i] - 2.0 * world::kPi;
- double y2 = wrap_phase[i + 1];
- double x = -y1 / (y2 - y1);
- pulse_locations_time_shift[number_of_pulses] = x / fs;
- ++number_of_pulses;
- }
- }
- delete[] wrap_phase_abs;
- delete[] wrap_phase;
- delete[] total_phase;
- return number_of_pulses;
- }
- static int GetTimeBase(const double *f0, int f0_length, int fs,
- double frame_period, int y_length, double lowest_f0,
- double *pulse_locations, int *pulse_locations_index,
- double *pulse_locations_time_shift, double *interpolated_vuv) {
- double *time_axis = new double[y_length];
- double *coarse_time_axis = new double[f0_length + 1];
- double *coarse_f0 = new double[f0_length + 1];
- double *coarse_vuv = new double[f0_length + 1];
- GetTemporalParametersForTimeBase(f0, f0_length, fs, y_length, frame_period,
- lowest_f0, time_axis, coarse_time_axis, coarse_f0, coarse_vuv);
- double *interpolated_f0 = new double[y_length];
- interp1(coarse_time_axis, coarse_f0, f0_length + 1,
- time_axis, y_length, interpolated_f0);
- interp1(coarse_time_axis, coarse_vuv, f0_length + 1,
- time_axis, y_length, interpolated_vuv);
- for (int i = 0; i < y_length; ++i) {
- interpolated_vuv[i] = interpolated_vuv[i] > 0.5 ? 1.0 : 0.0;
- interpolated_f0[i] =
- interpolated_vuv[i] == 0.0 ? world::kDefaultF0 : interpolated_f0[i];
- }
- int number_of_pulses = GetPulseLocationsForTimeBase(interpolated_f0,
- time_axis, y_length, fs, pulse_locations, pulse_locations_index,
- pulse_locations_time_shift);
- delete[] coarse_vuv;
- delete[] coarse_f0;
- delete[] coarse_time_axis;
- delete[] time_axis;
- delete[] interpolated_f0;
- return number_of_pulses;
- }
- static void GetDCRemover(int fft_size, double *dc_remover) {
- double dc_component = 0.0;
- for (int i = 0; i < fft_size / 2; ++i) {
- dc_remover[i] = 0.5 -
- 0.5 * cos(2.0 * world::kPi * (i + 1.0) / (1.0 + fft_size));
- dc_remover[fft_size - i - 1] = dc_remover[i];
- dc_component += dc_remover[i] * 2.0;
- }
- for (int i = 0; i < fft_size / 2; ++i) {
- dc_remover[i] /= dc_component;
- dc_remover[fft_size - i - 1] = dc_remover[i];
- }
- }
- } // namespace
- void Synthesis(const double *f0, int f0_length,
- const double * const *spectrogram, const double * const *aperiodicity,
- int fft_size, double frame_period, int fs, int y_length, double *y) {
- randn_reseed();
- double *impulse_response = new double[fft_size];
- for (int i = 0; i < y_length; ++i) y[i] = 0.0;
- MinimumPhaseAnalysis minimum_phase = {0};
- InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase);
- InverseRealFFT inverse_real_fft = {0};
- InitializeInverseRealFFT(fft_size, &inverse_real_fft);
- ForwardRealFFT forward_real_fft = {0};
- InitializeForwardRealFFT(fft_size, &forward_real_fft);
- double *pulse_locations = new double[y_length];
- int *pulse_locations_index = new int[y_length];
- double *pulse_locations_time_shift = new double[y_length];
- double *interpolated_vuv = new double[y_length];
- int number_of_pulses = GetTimeBase(f0, f0_length, fs, frame_period / 1000.0,
- y_length, fs / fft_size + 1.0, pulse_locations, pulse_locations_index,
- pulse_locations_time_shift, interpolated_vuv);
- double *dc_remover = new double[fft_size];
- GetDCRemover(fft_size, dc_remover);
- frame_period /= 1000.0;
- int noise_size;
- int index, offset, lower_limit, upper_limit;
- for (int i = 0; i < number_of_pulses; ++i) {
- noise_size = pulse_locations_index[MyMinInt(number_of_pulses - 1, i + 1)] -
- pulse_locations_index[i];
- GetOneFrameSegment(interpolated_vuv[pulse_locations_index[i]], noise_size,
- spectrogram, fft_size, aperiodicity, f0_length, frame_period,
- pulse_locations[i], pulse_locations_time_shift[i], fs,
- &forward_real_fft, &inverse_real_fft, &minimum_phase, dc_remover,
- impulse_response);
- offset = pulse_locations_index[i] - fft_size / 2 + 1;
- lower_limit = MyMaxInt(0, -offset);
- upper_limit = MyMinInt(fft_size, y_length - offset);
- for (int j = lower_limit; j < upper_limit; ++j) {
- index = j + offset;
- y[index] += impulse_response[j];
- }
- }
- delete[] dc_remover;
- delete[] pulse_locations;
- delete[] pulse_locations_index;
- delete[] pulse_locations_time_shift;
- delete[] interpolated_vuv;
- DestroyMinimumPhaseAnalysis(&minimum_phase);
- DestroyInverseRealFFT(&inverse_real_fft);
- DestroyForwardRealFFT(&forward_real_fft);
- delete[] impulse_response;
- }
- void SynthesisMBR(const double *f0, int f0_length,
- const double * const *spectrogram, const double * const *aperiodicity,
- int fft_size, double frame_period, int fs ,int y_length, double *y) {
- randn_reseed();
- for (int i = 0; i < y_length; ++i) y[i] = 0.0;
- MinimumPhaseAnalysis minimum_phase = {0};
- InitializeMinimumPhaseAnalysis(fft_size, &minimum_phase);
- InverseRealFFT inverse_real_fft = {0};
- InitializeInverseRealFFT(fft_size, &inverse_real_fft);
- ForwardRealFFT forward_real_fft = {0};
- InitializeForwardRealFFT(fft_size, &forward_real_fft);
- double *dc_remover = new double[fft_size];
- GetDCRemover(fft_size, dc_remover);
-
- assert(y_length == fft_size*f0_length);
-
- for(int i=0;i<f0_length;i++)
- {
- GetOneFrameSegmentMBR(i,f0,
- //interpolated_vuv[pulse_locations_index[i]], noise_size,
- spectrogram, fft_size, aperiodicity, f0_length, frame_period,
- //pulse_locations[i], pulse_locations_time_shift[i], fs,
- fs,
- &forward_real_fft, &inverse_real_fft, &minimum_phase, dc_remover,
- &y[fft_size*i]);
- }
- delete[] dc_remover;
- DestroyMinimumPhaseAnalysis(&minimum_phase);
- DestroyInverseRealFFT(&inverse_real_fft);
- DestroyForwardRealFFT(&forward_real_fft);
- }
|