123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515 |
- /*
- This file is part of QTau
- Copyright (C) 2013-2018 Tobias "Tomoko" Platen <tplaten@posteo.de>
- Copyright (C) 2013 digited <https://github.com/digited>
- Copyright (C) 2010-2013 HAL@ShurabaP <https://github.com/haruneko>
- QTau is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- SPDX-License-Identifier: GPL-3.0+
- */
- #include "utau_synth.h"
- #include "utaudb.h"
- #include "sekai/midi.h"
- #include "sekai/SekaiContext.h"
- #include "sekai/vvd.h"
- #include "sekai/mfcc.h"
- #include "world/constantnumbers.h"
- #include <QFileInfo>
- #include <unistd.h>
- #include <assert.h>
- #include <sndfile.h>
- #include <samplerate.h>
- #include <math.h>
- #include <stdlib.h>
- #include <QDebug>
- #include <stdio.h>
- #include <assert.h>
- #include <vector>
- #include <iostream>
- #include <../editor/ustjkeys.h>
- #include <QStringList>
- #include <QJsonDocument>
- #define __devloglevel__ 5
- #include <QDir>
- #include <QFile>
- #include <QTextStream>
- #include <QDebug>
- #include <QDirIterator>
- #include <sekai/common.h>
- #include <sekai/WorldSynth2.h>
- #include "otoini.h"
- #include <QtConcurrent/QtConcurrent>
- bool fileExists(QString path) {
- QFileInfo check_file(path);
- // check if file exists and if yes: Is it really a file and no directory?
- return check_file.exists() && check_file.isFile();
- }
- QString UTAUSynth::name() { return "UTAUSynth"; }
- QString UTAUSynth::description() { return "a Japanese singing synthesizer"; }
- QString UTAUSynth::version() { return "18.04"; }
- bool UTAUSynth::synthIsRealtime() { return false; }
- void UTAUSynth::setup(IController* ctrl) {
- this->_ctrl = ctrl;
- this->_jack_samplerate = ctrl->sampleRate();
- _kana2romaji["あ"]="a";_romaji2kana["a"]="あ";
- _kana2romaji["い"]="i";_romaji2kana["i"]="い";
- _kana2romaji["う"]="u";_romaji2kana["u"]="う";
- _kana2romaji["え"]="e";_romaji2kana["e"]="え";
- _kana2romaji["お"]="o";_romaji2kana["o"]="お";
- _kana2romaji["か"]="ka";_romaji2kana["ka"]="か";
- _kana2romaji["き"]="ki";_romaji2kana["ki"]="き";
- _kana2romaji["く"]="ku";_romaji2kana["ku"]="く";
- _kana2romaji["け"]="ke";_romaji2kana["ke"]="け";
- _kana2romaji["こ"]="ko";_romaji2kana["ko"]="こ";
- _kana2romaji["きゃ"]="kya";_romaji2kana["kya"]="きゃ";
- _kana2romaji["きゅ"]="kyu";_romaji2kana["kyu"]="きゅ";
- _kana2romaji["きょ"]="kyo";_romaji2kana["kyo"]="きょ";
- _kana2romaji["さ"]="sa";_romaji2kana["sa"]="さ";
- _kana2romaji["し"]="shi";_romaji2kana["shi"]="し";
- _kana2romaji["す"]="su";_romaji2kana["su"]="す";
- _kana2romaji["せ"]="se";_romaji2kana["se"]="せ";
- _kana2romaji["そ"]="so";_romaji2kana["so"]="そ";
- _kana2romaji["しゃ"]="sha";_romaji2kana["sha"]="しゃ";
- _kana2romaji["しゅ"]="shu";_romaji2kana["shu"]="しゅ";
- _kana2romaji["しょ"]="sho";_romaji2kana["sho"]="しょ";
- _kana2romaji["た"]="ta";_romaji2kana["ta"]="た";
- _kana2romaji["ち"]="chi";_romaji2kana["chi"]="ち";
- _kana2romaji["つ"]="tsu";_romaji2kana["tsu"]="つ";
- _kana2romaji["て"]="te";_romaji2kana["te"]="て";
- _kana2romaji["と"]="to";_romaji2kana["to"]="と";
- _kana2romaji["ちゃ"]="cha";_romaji2kana["cha"]="ちゃ";
- _kana2romaji["ちゅ"]="chu";_romaji2kana["chu"]="ちゅ";
- _kana2romaji["ちょ"]="cho";_romaji2kana["cho"]="ちょ";
- _kana2romaji["な"]="na";_romaji2kana["na"]="な";
- _kana2romaji["に"]="ni";_romaji2kana["ni"]="に";
- _kana2romaji["ぬ"]="nu";_romaji2kana["nu"]="ぬ";
- _kana2romaji["ね"]="ne";_romaji2kana["ne"]="ね";
- _kana2romaji["の"]="no";_romaji2kana["no"]="の";
- _kana2romaji["にゃ"]="nya";_romaji2kana["nya"]="にゃ";
- _kana2romaji["にゅ"]="nyu";_romaji2kana["nyu"]="にゅ";
- _kana2romaji["にょ"]="nyo";_romaji2kana["nyo"]="にょ";
- _kana2romaji["は"]="ha";_romaji2kana["ha"]="は";
- _kana2romaji["ひ"]="hi";_romaji2kana["hi"]="ひ";
- _kana2romaji["ふ"]="fu";_romaji2kana["fu"]="ふ";
- _kana2romaji["へ"]="he";_romaji2kana["he"]="へ";
- _kana2romaji["ほ"]="ho";_romaji2kana["ho"]="ほ";
- _kana2romaji["ひゃ"]="hya";_romaji2kana["hya"]="ひゃ";
- _kana2romaji["ひゅ"]="hyu";_romaji2kana["hyu"]="ひゅ";
- _kana2romaji["ひょ"]="hyo";_romaji2kana["hyo"]="ひょ";
- _kana2romaji["ま"]="ma";_romaji2kana["ma"]="ま";
- _kana2romaji["み"]="mi";_romaji2kana["mi"]="み";
- _kana2romaji["む"]="mu";_romaji2kana["mu"]="む";
- _kana2romaji["め"]="me";_romaji2kana["me"]="め";
- _kana2romaji["も"]="mo";_romaji2kana["mo"]="も";
- _kana2romaji["みゃ"]="mya";_romaji2kana["mya"]="みゃ";
- _kana2romaji["みゅ"]="myu";_romaji2kana["myu"]="みゅ";
- _kana2romaji["みょ"]="myo";_romaji2kana["myo"]="みょ";
- _kana2romaji["や"]="ya";_romaji2kana["ya"]="や";
- _kana2romaji["ゆ"]="yu";_romaji2kana["yu"]="ゆ";
- _kana2romaji["よ"]="yo";_romaji2kana["yo"]="よ";
- _kana2romaji["ら"]="ra";_romaji2kana["ra"]="ら";
- _kana2romaji["り"]="ri";_romaji2kana["ri"]="り";
- _kana2romaji["る"]="ru";_romaji2kana["ru"]="る";
- _kana2romaji["れ"]="re";_romaji2kana["re"]="れ";
- _kana2romaji["ろ"]="ro";_romaji2kana["ro"]="ろ";
- _kana2romaji["りゃ"]="rya";_romaji2kana["rya"]="りゃ";
- _kana2romaji["りゅ"]="ryu";_romaji2kana["ryu"]="りゅ";
- _kana2romaji["りょ"]="ryo";_romaji2kana["ryo"]="りょ";
- _kana2romaji["わ"]="wa";_romaji2kana["wa"]="わ";
- _kana2romaji["を"]="wo";_romaji2kana["wo"]="を";
- _kana2romaji["が"]="ga";_romaji2kana["ga"]="が";
- _kana2romaji["ぎ"]="gi";_romaji2kana["gi"]="ぎ";
- _kana2romaji["ぐ"]="gu";_romaji2kana["gu"]="ぐ";
- _kana2romaji["げ"]="ge";_romaji2kana["ge"]="げ";
- _kana2romaji["ご"]="go";_romaji2kana["go"]="ご";
- _kana2romaji["ぎゃ"]="gya";_romaji2kana["gya"]="ぎゃ";
- _kana2romaji["ぎゅ"]="gyu";_romaji2kana["gyu"]="ぎゅ";
- _kana2romaji["ぎょ"]="gyo";_romaji2kana["gyo"]="ぎょ";
- _kana2romaji["ざ"]="za";_romaji2kana["za"]="ざ";
- _kana2romaji["じ"]="ji";_romaji2kana["ji"]="じ";
- _kana2romaji["ず"]="zu";_romaji2kana["zu"]="ず";
- _kana2romaji["ぜ"]="ze";_romaji2kana["ze"]="ぜ";
- _kana2romaji["ぞ"]="zo";_romaji2kana["zo"]="ぞ";
- _kana2romaji["じゃ"]="ja";_romaji2kana["ja"]="じゃ";
- _kana2romaji["じゅ"]="ju";_romaji2kana["ju"]="じゅ";
- _kana2romaji["じょ"]="jo";_romaji2kana["jo"]="じょ";
- _kana2romaji["だ"]="da";_romaji2kana["da"]="だ";
- _kana2romaji["で"]="de";_romaji2kana["de"]="で";
- _kana2romaji["ど"]="do";_romaji2kana["do"]="ど";
- _kana2romaji["ば"]="ba";_romaji2kana["ba"]="ば";
- _kana2romaji["び"]="bi";_romaji2kana["bi"]="び";
- _kana2romaji["ぶ"]="bu";_romaji2kana["bu"]="ぶ";
- _kana2romaji["べ"]="be";_romaji2kana["be"]="べ";
- _kana2romaji["ぼ"]="bo";_romaji2kana["bo"]="ぼ";
- _kana2romaji["びゃ"]="bya";_romaji2kana["bya"]="びゃ";
- _kana2romaji["びゅ"]="byu";_romaji2kana["byu"]="びゅ";
- _kana2romaji["びょ"]="byo";_romaji2kana["byo"]="びょ";
- _kana2romaji["ぱ"]="pa";_romaji2kana["pa"]="ぱ";
- _kana2romaji["ぴ"]="pi";_romaji2kana["pi"]="ぴ";
- _kana2romaji["ぷ"]="pu";_romaji2kana["pu"]="ぷ";
- _kana2romaji["ぺ"]="pe";_romaji2kana["pe"]="ぺ";
- _kana2romaji["ぽ"]="po";_romaji2kana["po"]="ぽ";
- _kana2romaji["ぴゃ"]="pya";_romaji2kana["pya"]="ぴゃ";
- _kana2romaji["ぴゅ"]="pyu";_romaji2kana["pyu"]="ぴゅ";
- _kana2romaji["ぴょ"]="pyo";_romaji2kana["pyo"]="ぴょ";
- _kana2romaji["ん"]="n";_romaji2kana["n"]="ん";
- _synth = new WorldSynth2(1024*16,1024*2,_jack_samplerate);//FIXME do not hardcode buffer lengths and FFT size
- connect(this,&UTAUSynth::logDebug,this,&UTAUSynth::on_logDebug);
- connect(this,&UTAUSynth::logError,this,&UTAUSynth::on_logError);
- connect(this,&UTAUSynth::logSuccess,this,&UTAUSynth::on_logSuccess);
- connect(this,&UTAUSynth::endOfThread,this,&UTAUSynth::on_endOfThread);
- }
- bool UTAUSynth::setScore(const QJsonArray &s)
- {
- if(_threadRunning) return false;
- _score = s;
- return true;
- }
- bool UTAUSynth::synthesize()
- {
- if(_threadRunning) return false;
- _threadRunning = true;
- if(_synth->currentTime())
- {
- _synth->reset();
- }
- _segments.clear();
- buildScore();
- QtConcurrent::run(this,&UTAUSynth::synthThread);
- return true; //will be scheduled if true
- }
- void UTAUSynth::synthThread()
- {
- SF_INFO info;
- memset(&info,0,sizeof(info));
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- info.samplerate = _jack_samplerate;
- info.channels = 1;
- SNDFILE* sndfile = sf_open("/tmp/utau_synth.wav",SFM_WRITE,&info);
- int data_count = 1024;
- float* data = new float[data_count];
- while(1)
- {
- int result = readData(data,data_count);
- if(result) break;
- sf_write_float(sndfile,data,data_count);
- }
- sf_close(sndfile);
- delete[] data;
- emit endOfThread();
- }
- int UTAUSynth::readData(float *data, int size)
- {
- int fs = _jack_samplerate;
- if(size==0)
- {
- return 1;
- }
- for(int i=0;i<50;i++)
- {
- float current_f0 = 0;
- float current_time = _synth->currentTime()*1.0/fs;
- utau_note* note = nullptr;
- float end = _segments[_segments.length()-1]->end;
- for(int i=0;i<_segments.count();i++)
- {
- if(current_time >= _segments[i]->start && current_time < _segments[i]->end) { note=_segments[i]; break; }
- }
- if(current_time > end) return 1;
- if(note && note->oto)
- {
- // oto means sound
- oto_entry* oto = note->oto;
- int cepstrum_length=_currentUTAU->_reader->getCepstrumLength();
- float vvddata[cepstrum_length*2+1];
- current_f0 = note->f0;
- float x[3] = {note->start,
- note->start+oto->consonant,
- note->end};
- _currentUTAU->_reader->selectVVD(oto->vvd_index);
- float l = _currentUTAU->_reader->getSelectedLength();
- float c = l-oto->cutoff; //default case
- //c =
- float y[3] = {oto->offset,oto->offset+oto->consonant,c};
- float frame_period = _currentUTAU->_reader->getFramePeriod();
- float pos2 = interp_linear(x,y,3,current_time);
- float index = pos2 * 1000.0 / frame_period;
- bool valid = _currentUTAU->_reader->getSegment(index,vvddata);
- if(!valid) DEVLOG_ERROR("invalid segment");
- float* mel_cepstrum1 = &vvddata[1];
- float* mel_cepstrum2 = &vvddata[1+cepstrum_length];
- _synth->setF0(current_f0);
- _synth->setFrame(mel_cepstrum1,mel_cepstrum2,cepstrum_length);
- }
- else
- {
- //produce silence
- _synth->setF0(0);
- _synth->setSilence();
- }
- _synth->doSynth();
- if(_synth->isFilled(1024*2+size)) {
- _synth->pop(data,size);
- return 0;
- }
- }
- DEVLOG_ERROR("not a bug");
- abort();
- }
- void UTAUSynth::buildScore()
- {
- _segments.clear();//FIXME memory leak
- //UTAU has one segment per note
- DEVLOG_DEBUG("buildscore: "+STR(_score.count()));
- int lastNoteEnd=0;
- float tempo=0;
- float offset=0; //in seconds
- for (int i = 0; i < _score.count(); ++i)
- {
- auto o = _score[i].toObject();
- if(!o.contains(NOTE_KEY_NUMBER)) {
- tempo=o[TEMPO].toInt();
- continue;
- }
- int noteOffset = o[NOTE_PULSE_OFFSET].toInt();
- int noteLength = o[NOTE_PULSE_LENGTH].toInt();
- QString lyric = o[NOTE_LYRIC].toString();
- int notenum = o[NOTE_KEY_NUMBER].toInt();
- //lookup lyric -- if invalid :: create silence segment
- int rest = noteOffset-lastNoteEnd;
- if(rest<0)
- {
- DEVLOG_ERROR("overlapping notes");
- //return error
- }
- else if(rest>0)
- {
- utau_note* note = new utau_note;
- note->start=offset;
- float length_seconds = rest*60.0/tempo/480.0;
- offset += length_seconds;
- note->end=offset;
- note->lyric="";
- note->oto = nullptr;
- _segments.append(note);
- }
- utau_note* note = new utau_note;
- note->start=offset;
- float length_seconds = noteLength*60.0/tempo/480.0;
- offset += length_seconds;
- note->end=offset;
- note->lyric=lyric;
- note->f0 = frequencyFromNote(notenum);
- note->oto = _currentUTAU->getEntryByLyric(note->lyric);
- lastNoteEnd= noteOffset+noteLength;
- _segments.append(note);
- }
- }
- QString UTAUSynth::getTranscription(QString txt)
- {
- if(txt.split(" [").length()==2)
- return txt;
- if(_currentUTAU==nullptr)
- return txt;
- oto_entry* ent = _currentUTAU->getEntryByLyric(txt.trimmed());
- if(ent)
- {
- QString tmp = ent->name;
- if(_romaji2kana.keys().contains(tmp))
- tmp = _romaji2kana[tmp];//assume all UTAUs are Japanese
- return txt+"["+tmp+"]";
- }
- return txt;
- }
- bool UTAUSynth::doPhonemeTransformation(QStringList& list)
- {
- (void) list;
- return false;
- }
- bool UTAUSynth::setVoice(QString voiceName)
- {
- if(_voices.keys().contains(voiceName))
- {
- _currentUTAU = _voices[voiceName];
- return true;
- }
- else
- {
- _currentUTAU = nullptr;
- return false;
- }
- }
- QStringList UTAUSynth::listVoices()
- {
- if(_voices.keys().length()==0) scanUtauDir();
- return _voices.keys();
- }
- //??
- void UTAUSynth::on_logError(QString error)
- {
- _ctrl->logError(error);
- }
- void UTAUSynth::on_logSuccess(QString success)
- {
- _ctrl->logSuccess(success);
- }
- void UTAUSynth::on_logDebug(QString debug)
- {
- _ctrl->logDebug(debug);
- }
- void UTAUSynth::on_endOfThread()
- {
- _threadRunning = false;
- _ctrl->startOfflinePlayback("/tmp/utau_synth.wav");
- }
- bool UTAUSynth::setCacheDir(QString cacheDir)
- {
- (void) cacheDir;
- return true;
- }
- void UTAUSynth::scanUtauDir()
- {
- QStringList ret;
- QStringList searchPaths;
- searchPaths << QDir::home().filePath(".local/share/utau/voice");
- searchPaths << "/usr/share/utau/voice";
- foreach(QString searchPath, searchPaths)
- {
- QDir dir(searchPath);
- QDirIterator it(dir);
- while (it.hasNext())
- {
- QString vdir = it.next();
- if(QFileInfo(dir,vdir).isDir())
- {
- if(QFileInfo(dir,vdir+"/voices.json").isFile())
- {
- QFile jsonFile(dir.absoluteFilePath(vdir+"/voices.json"));
- if (jsonFile.open(QFile::ReadOnly))
- {
- QJsonDocument doc = QJsonDocument::fromJson(jsonFile.readAll());
- if(doc.isArray())
- {
- QJsonArray a = doc.array();
- for (int i = 0; i < a.count(); ++i)
- {
- QJsonObject o = a[i].toObject();
- QString name = o["name"].toString();
- if(o.keys().contains("utausynth_flags"))
- {
- UTAUDB* db = new UTAUDB(vdir);
- if(db->_reader->getSamplerate()==_jack_samplerate)
- {
- _voices[name] = db;
- }
- else
- {
- //this->logError("voice "+name+"has a different samplerate than jack, your utau resampler will fail");
- DEVLOG_DEBUG("not adding voice");
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
|