123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- //-----------------------------------------------------------------------------
- // Copyright 2017 Masanori Morise
- // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
- // Last update: 2017/04/01
- //
- // Summary:
- // This example estimates the spectral envelope from an audio file
- // and then saves the result to a file.
- //
- // How to use:
- // % spanalysis -h
- //
- // Related works: f0analysis.cpp, apanalysis.cpp, readandsynthesis.cpp
- //-----------------------------------------------------------------------------
- #include <sndfile.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- #include "world/cheaptrick.h"
- #include "world/codec.h"
- #include "world/constantnumbers.h"
- #include "world/d4c.h"
- #include "world/synthesis.h"
- #include "sekai/Track.h"
- namespace {
- //-----------------------------------------------------------------------------
- // Display how to use this program
- //-----------------------------------------------------------------------------
- void usage(char *argv) {
- printf("\n");
- printf(" %s - Wide-Band Resynthesis OverLap Add\n", argv);
- printf("\n");
- printf(" usage:\n");
- printf(" %s input.wav input.f0 [options]\n", argv);
- printf(" options:\n");
- printf(" -f f : FFT size (samples) [variable]\n");
- printf(" : Default depends on fs (44100 -> 2048, 16000 -> 1024)\n");
- printf(" -q q : compensation coefficient [-0.15]\n");
- printf(" : I don't recommend to change this value.\n");
- printf(" -d d : number of coefficients [0 (without coding)]\n");
- printf(" : Spectral envelope is decoded by these coefficients.\n");
- printf(" : You must not set this value above the half of\n");
- printf(" : the FFT size.\n");
- printf(" -t t : threshhold used in D4C Lovetrain [0.85]\n");
- printf(" -m m : mbr_period [fft_size]\n");
- printf(" -o name : filename used for output [output.sp]\n");
- printf("\n");
- }
- //-----------------------------------------------------------------------------
- // Set parameters from command line options
- //-----------------------------------------------------------------------------
- int SetOption(int argc, char **argv, int *fft_size, double *q1,double *threshold,int* mbr_period,
- int *number_of_dimensions, char *filename) {
- while (--argc) {
- if (strcmp(argv[argc], "-f") == 0) *fft_size = atoi(argv[argc + 1]);
- if (strcmp(argv[argc], "-q") == 0) *q1 = atof(argv[argc + 1]);
- if (strcmp(argv[argc], "-t") == 0) *threshold = atof(argv[argc + 1]);
- if (strcmp(argv[argc], "-m") == 0) *mbr_period = atoi(argv[argc + 1]);
- if (strcmp(argv[argc], "-d") == 0)
- *number_of_dimensions = atof(argv[argc + 1]);
- if (strcmp(argv[argc], "-o") == 0)
- snprintf(filename, 200, "%s", argv[argc + 1]);
- if (strcmp(argv[argc], "-h") == 0) {
- usage(argv[0]);
- return 0;
- }
- }
- return 1;
- }
- bool sndfile_write(double* samples,int length,int samplerate,char* fileName)
- {
- SF_INFO info;
- memset(&info,0,sizeof(info));
- std::string fn = fileName;
- if(fn.substr(fn.find_last_of(".") + 1) == "ogg")
- {
- info.format = SF_FORMAT_OGG | SF_FORMAT_VORBIS;
- }
- else
- {
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- }
- info.samplerate = samplerate;
- info.channels = 1;
- SNDFILE* sf = sf_open(fileName,SFM_WRITE,&info);
- int count = sf_write_double(sf,samples,length);
- sf_close(sf);
- return count==length;
- }
- void sndfile_write_compressed(double* y,int y_length,int x_length,int fs,char* fileName,int mbr_period,double frame_period,int f0_length,int fft_size)
- {
- double* y2 = new double[y_length];
- int n_frames = x_length/mbr_period+1;
- for(int i=0;i<n_frames;i++)
- {
- float output_pos = i*1.0*mbr_period/fs;
- float input_pos = output_pos*1000/frame_period;
-
- int index0 = (int)input_pos;
- int index1 = index0+1;
- float frac = input_pos-index0;
-
- if(index0<f0_length-2)
- {
- float frac2 = 1-frac;
- //printf("frac %f frac2 %f\n",frac,frac2);
- double* tmp = &y2[i*mbr_period];
- double* left = &y[index0*fft_size];
- double* right = &y[index1*fft_size];
- for(int j=0;j<fft_size;j++)
- {
- tmp[j] += frac2*left[j] + frac*right[j];
- }
- }
- }
- int shift = fft_size/2;
- sndfile_write(y2+shift, x_length, fs, fileName);
- }
- } // namespace
- //-----------------------------------------------------------------------------
- // This example estimates the spectral envelope from an audio file
- // and then saves the result to a file.
- //-----------------------------------------------------------------------------
- int main(int argc, char **argv) {
- // Command check
- if (argc < 2 || 0 == strcmp(argv[1], "-h")) {
- usage(argv[0]);
- return -1;
- }
-
- // Read F0 information : TODO use Track class
- Track track;
- track.readFromFile(argv[2]);
- int f0_length = track.getPitchCount();
- double frame_period = 1000*atof(track.getHeaderInfo("FrameShift").c_str());
-
- double *f0 = new double[f0_length];
- double *temporal_positions = new double[f0_length];
- for(int i=0;i<f0_length;i++)
- {
- Pitch p = track.getPitch(i);
- temporal_positions[i] = p.pos;
- f0[i] = p.f0;
- }
- SF_INFO info;
- memset(&info, 0, sizeof(info));
- SNDFILE *infile = sf_open(argv[1], SFM_READ, &info);
- if (infile == 0) {
- printf("errror: cannot open wav file\n");
- }
- int fs, x_length;
- fs = info.samplerate;
- x_length = info.frames;
- if (info.channels != 1) {
- printf("error: wavfile must be mono\n");
- }
- double *x = new double[x_length];
- sf_read_double(infile, x, x_length);
- sf_close(infile);
- // Default parameters
- CheapTrickOption option = { 0 };
- InitializeCheapTrickOption(fs, &option);
- char filename[200] = "output.wav";
- int number_of_dimensions = 0;
-
- D4COption option2 = { 0 };
- InitializeD4COption(&option2);
- option2.threshold = 0.85;
- int mbr_period = option.fft_size;
- // Options from command line
- if (SetOption(argc, argv, &option.fft_size, &option.q1,&option2.threshold,&mbr_period,
- &number_of_dimensions, filename) == 0) return -1;
- // Spectral envelope analysis
- double **spectrogram = new double *[f0_length];
- for (int i = 0; i < f0_length; ++i)
- spectrogram[i] = new double[option.fft_size / 2 + 1];
- CheapTrick(x, x_length, fs, temporal_positions, f0, f0_length, &option,
- spectrogram);
-
- // Aperiodicity analysis
- double **aperiodicity = new double *[f0_length];
- for (int i = 0; i < f0_length; ++i)
- aperiodicity[i] = new double[option.fft_size / 2 + 1];
- D4C(x, x_length, fs, temporal_positions, f0, f0_length, option.fft_size,
- &option2, aperiodicity);
-
- int y_length =f0_length*option.fft_size;
- double* y = new double[y_length];
- // Resynthesis
- SynthesisMBR(f0, f0_length,
- spectrogram, aperiodicity,
- option.fft_size, frame_period, fs,
- y_length, y);
-
- //write wav file
-
- if(mbr_period==option.fft_size)
- sndfile_write(y,y_length,fs,filename);
- else
- sndfile_write_compressed(y,y_length,x_length,fs,filename,mbr_period,frame_period,f0_length,option.fft_size);
- // Memory deallocation
- for (int i = 0; i < f0_length; ++i) delete[] spectrogram[i];
- delete[] spectrogram;
- delete[] f0;
- delete[] temporal_positions;
- delete[] x;
- return 0;
- }
|