123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- #include <espeak-ng/espeak_ng.h>
- #define PLAYBACK_MODE (ENOUTPUT_MODE_SYNCHRONOUS | ENOUTPUT_MODE_SPEAK_AUDIO)
- #include <vector>
- #include <string>
- #include <sndfile.h>
- #include <math.h>
- #include <string.h>
- #include <boost/lexical_cast.hpp>
- struct segment
- {
- char type;
- int start;
- int length;
- };
- struct pho_event
- {
- std::string code;
- int type;
- int start;
- };
- struct output_data
- {
- // for each run
- std::vector<short int> samples;
- std::vector<segment> segments;
- std::vector<pho_event> pho;
- char segment_type = 0;
- int segment_pos = 0;
- float length;
- };
- struct output_data* current;
- std::vector<output_data*> runs;
- void init()
- {
- espeak_ng_InitializePath(NULL);
-
- espeak_ng_ERROR_CONTEXT context = NULL;
- espeak_ng_Initialize(&context);
-
- espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 0, NULL); //FIX https://notabug.org/isengaara/sekai/issues/4
- }
- void segment_event(char type)
- {
- if(current->segment_type!=type)
- {
- if(current->segment_type!=0)
- {
- segment s;
- s.type = current->segment_type;
- s.start = current->segment_pos;
- s.length = current->samples.size()-current->segment_pos;
- current->segments.push_back(s);
- }
- current->segment_type = type;
- current->segment_pos = current->samples.size();
- }
- }
- void outputPhoSymbol(char* pho_code,int pho_type)
- {
- pho_event p;
- p.code = pho_code;
- p.type = pho_type;
- p.start = current->samples.size();
- current->pho.push_back(p);
-
- }
- void outputSilence(short int sample)
- {
- segment_event('S');
- current->samples.push_back(sample);
- }
- void outputUnvoiced(short int sample)
- {
- segment_event('U');
- current->samples.push_back(sample);
- }
- void outputVoiced(short int sample)
- {
- segment_event('V');
- current->samples.push_back(sample);
- }
- void flush()
- {
- outputPhoSymbol((char*)"#",0);
- segment_event(0);
- }
- void write_zstring(FILE* f,std::string s)
- {
- fwrite(s.c_str(),1,s.length()+1,f);
- }
- void write_int(FILE* f,int n)
- {
- fwrite(&n,1,sizeof(int),f);
- }
- void write_short(FILE* f,short n)
- {
- fwrite(&n,1,sizeof(short),f);
- }
- void write_char(FILE* f,char n)
- {
- fwrite(&n,1,sizeof(char),f);
- }
- void write_float(FILE* f,float n)
- {
- fwrite(&n,1,sizeof(float),f);
- }
- void do_synth(int rate,int f0,char* lyric)
- {
- // synth with param
- espeak_SetParameter(espeakRATE, rate, 0);
-
- //synth
- espeak_ng_SetConstF0(f0);
- espeak_ng_Synthesize(lyric, 0, 0, POS_CHARACTER, 0, 0, NULL, NULL);
- flush();
- }
- void show_length(float fs)
- {
- float count = current->samples.size();
- float length = count/fs;
- current->length = length;
- }
- void find_best_one(float note_length)
- {
- bool found = false;
-
- for(size_t i=0; i<runs.size()-1; i++)
- {
- float delta0 = fabs(runs[i]->length - note_length);
- float delta1 = fabs(runs[i+1]->length - note_length);
- //printf("find best one %i %f %f",(int)i,delta0,delta1);
- if(!found && delta1 > delta0)
- {
- //printf(" found");
- current = runs[i];
- found = true;
- }
- //printf("\n");
- }
- }
-
- int main(int argc,char** argv)
- {
- if(argc<6)
- {
- printf("usage: espeak-sg voice f0 lyric rate filename [optargs..]\n");
- return 0;
- }
-
- char* voice = argv[1];
- int f0 = atoi(argv[2]);
- char* lyric = argv[3];
- int rate = atoi(argv[4]);
- char* filename = argv[5];
-
- bool have_note_length = false;
- float note_length = 0;
-
- for(int i=6;i<argc;i++)
- {
- char* optarg = argv[i];
- if(strlen(optarg)>4)
- {
- if(optarg[0]=='n' && optarg[1]=='l' && optarg[2]=='=')
- {
- note_length = boost::lexical_cast<float>(optarg+3);
- have_note_length = true;
- }
- }
- }
-
-
-
-
- init();
- espeak_ng_SetVoiceByName(voice);
-
-
-
- espeak_ng_OUTPUT_HOOKS hooks;
- hooks.outputPhoSymbol = outputPhoSymbol;
- hooks.outputSilence = outputSilence;
- hooks.outputUnvoiced = outputUnvoiced;
- hooks.outputVoiced = outputVoiced;
- espeak_ng_SetOutputHooks(&hooks);
-
- float samplerate = (float)espeak_ng_GetSampleRate();
-
- if(rate!=0)
- {
- current = new output_data;
- do_synth(rate,f0,lyric);
- }
- else
- {
- if(!have_note_length)
- {
- fprintf(stderr,"note length required\n");
- return 1;
- }
- #if 0
- SF_INFO info;
- info.samplerate = espeak_ng_GetSampleRate();
- info.channels = 1;
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- info.sections = 0;
- info.frames = 0;
- info.seekable = 0;
- SNDFILE* sf = sf_open("/tmp/debug.wav",SFM_WRITE,&info);
- #endif
-
- for(int current_rate = 80; current_rate < 450; current_rate += 5)
- {
- //printf("set rate %i\n",current_rate);
- current = new output_data;
- do_synth(current_rate,f0,lyric);
- show_length(samplerate);
- runs.push_back(current);
- }
-
- find_best_one(note_length);
-
- #if 0
- sf_write_short(sf,current->samples.data(),current->samples.size());
- sf_close(sf);
- #endif
- }
-
- FILE* f = fopen(filename,"w");
- write_zstring(f,"espeak-sg");
- write_int(f,0); //version of the file format
- write_int(f,samplerate);
- write_int(f,f0);
-
-
- write_int(f,current->pho.size());
- for (auto i : current->pho)
- {
- write_zstring(f,i.code);
- write_int(f,i.type);
- write_int(f,i.start);
- }
-
- write_int(f,current->segments.size());
- for (auto i : current->segments)
- {
- write_char(f, i.type);
- write_int(f, i.start);
- write_int(f, i.length);
- }
-
- write_int(f,current->samples.size());
- fwrite(current->samples.data(),current->samples.size(),sizeof(short),f);
-
- fclose(f);
-
- return 0;
- }
|