123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- /*
- Copyright 2020 Tobias "Tomoko" Platen
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #include "sekai/VoiceDefESPEAK.h"
- #include <fstream>
- #include <iostream>
- #include <sstream>
- #include <boost/algorithm/string/classification.hpp>
- #include <boost/algorithm/string/split.hpp>
- #include <boost/lexical_cast.hpp>
- #include <stdio.h>
- static std::string readstr(FILE* f, int length) {
- char str[length + 1];
- fread(str, 1, length, f);
- str[length] = 0;
- return str;
- }
- static std::string readzstr(FILE* f) {
- std::string ret;
- while (1) {
- char c = fgetc(f);
- if (c == 0) break;
- ret += c;
- }
- return ret;
- }
- static void read_uint16(FILE* f, uint16_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_uint32(FILE* f, uint32_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_int16(FILE* f, int16_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_int32(FILE* f, int32_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_uint8(FILE* f, uint8_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- VoiceDefESPEAK::VoiceDefESPEAK(std::string path) {
-
- FILE* f = fopen(path.c_str(), "rb");
-
- if(f==nullptr)
- {
- throw std::runtime_error("VoiceDefESPEAK invalid file "+path);
- }
-
- std::string header = readzstr(f);
-
- uint32_t version;
- uint32_t f0;
-
- read_uint32(f,&version,1);
- read_uint32(f,&_samplerate,1);
- read_uint32(f,&f0,1);
-
- _period = _samplerate*1.0/f0;
-
- uint32_t pho_length;
- uint32_t segments_length;
- uint32_t samples_length;
-
-
- read_uint32(f,&pho_length,1);
- //printf("pho %i\n",pho_length);
- for (uint32_t i=0;i<pho_length;i++)
- {
- pho_event e;
- e.code = readzstr(f);
- read_uint32(f,&e.type,1);
- read_uint32(f,&e.start,1);
- _pho_events.push_back(e);
- }
-
- read_uint32(f,&segments_length,1);
- //printf("seg %i\n",segments_length);
- for (uint32_t i=0;i<segments_length;i++)
- {
- segment s;
- read_uint8(f,&s.type,1);
- read_uint32(f,&s.start,1);
- read_uint32(f,&s.length,1);
- _segments.push_back(s);
- }
-
- read_uint32(f,&samples_length,1);
- //printf("samp %i\n",samples_length);
- _samples.resize(samples_length);
- read_int16(f,_samples.data(),samples_length);
-
-
-
-
- fclose(f);
-
- }
- VoiceDefESPEAK::~VoiceDefESPEAK() {}
- void VoiceDefESPEAK::getImpulseResponse(float currentTime,
- float *impulseResponse,
- int *impulseResponseLength,float morph) {
- int samples = currentTime * _samplerate;
- *impulseResponseLength = 0;
- for (uint i = 0; i < _segments.size(); i++) {
- uint32_t start = _segments[i].start;
- uint32_t end = _segments[i].start+_segments[i].length;
- if (samples >= start && samples < end) {
-
- //printf("found segment %i %c %i\n",i,_segments[i].type,samples);
-
- if(_segments[i].type=='V')
- {
- uint vstart = start;
- uint mbr_period = (uint)(_period + 0.5);
- uint index = (samples - vstart) / mbr_period;
- uint offset = index * mbr_period + vstart;
- uint len = 2 * mbr_period;
-
- for (uint j = 0; j < len; j++) {
- if (offset + j < _samples.size())
- {
- impulseResponse[j] = _samples[offset+j] * 1.0 / 32768;
- }
- else
- impulseResponse[j] = 0;
- }
-
- *impulseResponseLength = len;
- return;
- }
- else
- {
-
- }
-
-
-
-
- }
- }
- }
- float VoiceDefESPEAK::getLength()
- {
- return _samples.size()*1.0/_samplerate;
- }
- std::string VoiceDefESPEAK::getPhoLine(int index)
- {
- if(index>=0 && index < (int)_pho_events.size()-1)
- {
- std::stringstream ss;
-
-
- float a = _pho_events[index].start*1.0/_samplerate;
- float b = _pho_events[index+1].start*1.0/_samplerate;
-
- char* ototypes[]= {"PAUSE","STRESS","VOWEL","LIQUID","STOP","VSTOP","FRICATIVE","VFRICATIVE","NASAL","VIRTUAL","DELETED","INVALID"};
-
- ss << a << ' ' << b << ' ' << _pho_events[index].code << ' ' << ototypes[_pho_events[index].type]; //FIXME type info ??
- return ss.str();
- }
- return "";
- }
|