123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222 |
- /*
- Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
- Copyright 2020 Tobias "Tomoko" Platen
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #include "sekai/VoiceDefMBROLA.h"
- #include <stdio.h>
- #include <sndfile.h>
- static std::string readstr(FILE* f, int length) {
- char str[length + 1];
- fread(str, 1, length, f);
- str[length] = 0;
- return str;
- }
- static std::string readzstr(FILE* f) {
- std::string ret;
- while (1) {
- char c = fgetc(f);
- if (c == 0) break;
- ret += c;
- }
- return ret;
- }
- static void read_uint16(FILE* f, uint16_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_uint32(FILE* f, uint32_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_int16(FILE* f, int16_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_int32(FILE* f, int32_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- static void read_uint8(FILE* f, uint8_t* ret, int count) {
- fread(ret, count, sizeof(*ret), f);
- }
- VoiceDefMBROLA::VoiceDefMBROLA(std::string path) {
- FILE* database = fopen(path.c_str(), "rb");
- std::string magic = readstr(database, 6);
- std::string version = readstr(database, 5);
- #ifdef DUMP_DB
- printf("magic %s\n", magic.c_str());
- printf("version %s\n", version.c_str());
- #endif
- int16_t nb_diphone = 0;
- read_int16(database, &nb_diphone, 1);
- #ifdef DUMP_DB
- printf("nb_diphone %i\n", nb_diphone);
- #endif
- uint32_t sizemark = 0;
- uint16_t oldsizemark = 0;
- read_uint16(database, &oldsizemark, 1);
- if (oldsizemark == 0) {
- read_uint32(database, &sizemark, 1);
- } else {
- sizemark = oldsizemark;
- }
- #ifdef DUMP_DB
- printf("sizemark %i\n", sizemark);
- #endif
- int32_t sizeraw = 0;
- int16_t samplerate = 0;
- read_int32(database, &sizeraw, 1);
- read_int16(database, &samplerate, 1);
- #ifdef DUMP_DB
- printf("samplerate %i\n", samplerate);
- #endif
- _samplerate = samplerate;
- uint8_t mbrperiod;
- uint8_t coding;
- read_uint8(database, &mbrperiod, 1);
- read_uint8(database, &coding, 1);
- _mbrperiod = mbrperiod;
- int32_t indice_pm = 0; /* cumulated pos in pitch mark vector */
- int32_t indice_wav = 0; /* cumulated pos in the waveform dba */
- uint8_t nb_wframe; /* Physical number of frame */
- std::string new_left;
- std::string new_right;
- int16_t new_halfseg;
- uint8_t new_nb_frame;
- int32_t new_pos_pm;
- int32_t new_pos_wave;
- int i = 0;
- for (i = 0; ((int)indice_pm != (int)sizemark) && (i < nb_diphone); i++) {
- new_left = readzstr(database);
- new_right = readzstr(database);
- read_int16(database, &new_halfseg, 1);
- fread(&new_nb_frame, sizeof(new_nb_frame), 1, database);
- fread(&nb_wframe, sizeof(nb_wframe), 1, database);
- new_pos_wave = indice_wav;
- indice_wav += (long)nb_wframe * (long)mbrperiod;
- #ifdef DUMP_DB
- printf("%i Diph [[%s-%s]] poswav=%li halfseg=%li pospm=%i nbframe=%i\n", i,
- new_left.c_str(), new_right.c_str(), new_pos_wave, new_halfseg,
- new_pos_pm, new_nb_frame);
- #else
- (void)new_pos_pm;
- #endif
- std::string diph_index = new_left + "-" + new_right;
- diphone* diph = new diphone;
- diph->begin = new_pos_wave;
- diph->middle = new_pos_wave + new_halfseg;
- diph->end = new_pos_wave + mbrperiod * new_nb_frame;
- _diphone_map[diph_index] = diph;
- new_pos_pm = indice_pm;
- indice_pm += new_nb_frame;
- }
- for (; i < nb_diphone; i++) {
- std::string left = readzstr(database);
- std::string right = readzstr(database);
- std::string left2 = readzstr(database);
- std::string right2 = readzstr(database);
- #ifdef DUMP_DB
- printf("%i Diph [[%s-%s]] -> [[%s-%s]]\n", i, left.c_str(), right.c_str(),
- left2.c_str(), right2.c_str());
- #endif
- }
- // read pitchmark
- uint32_t round_size = (sizemark + 3) / 4; /* round to upper value */
- /* Compress 4 pitch marks in one byte */
- uint8_t* pmk = new uint8_t[round_size];
- fread(pmk, sizeof(uint8_t), round_size, database);
- _voice_data_length = sizeraw / sizeof(short);
- _voice_data = new short[_voice_data_length];
- fread(_voice_data, sizeof(short), _voice_data_length, database);
- #ifdef DUMP_DB
- SF_INFO info = {0};
- info.samplerate = samplerate;
- info.channels = 1;
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- SNDFILE* sf = sf_open("/tmp/dump_mbrola.wav", SFM_WRITE, &info);
- sf_write_short(sf, _voice_data, _voice_data_length);
- sf_close(sf);
- #endif
- std::string end = readzstr(database);
- #ifdef DUMP_DB
- printf("end: %s", end.c_str());
- #endif
- fclose(database);
- }
- VoiceDefMBROLA::~VoiceDefMBROLA() {}
- diphone* VoiceDefMBROLA::getDiphone(std::string index) {
- return _diphone_map[index];
- }
- void VoiceDefMBROLA::getImpulseResponse(float currentTime,
- float* impulseResponse,
- int* impulseResponseLength,float morph) {
- int samples = currentTime * _samplerate;
- *impulseResponseLength = 0;
- // reference
- int index = samples / _mbrperiod;
- int offset = index * _mbrperiod;
- uint len = 2 * _mbrperiod;
- for (uint i = 0; i < len; i++) {
- if (offset + i < _voice_data_length)
- impulseResponse[i] = _voice_data[offset + i] / 32768.0 * 0.9;
- else
- impulseResponse[i] = 0;
- }
- // TODO: add unvoiced part
- *impulseResponseLength = len;
- }
- float VoiceDefMBROLA::getLength()
- {
- return 0;
- }
|