123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- /*
- This file is part of QTau
- Copyright (C) 2013-2020 Tobias "Tomoko" Platen <tplaten@posteo.de>
- Copyright (C) 2013 digited <https://github.com/digited>
- Copyright (C) 2010-2013 HAL@ShurabaP <https://github.com/haruneko>
- QTau is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- SPDX-License-Identifier: GPL-3.0+
- */
- #include "vosamp.h"
- #include "../editor/ustjkeys.h"
- #include <QDebug>
- #include <QDir>
- #include <QDirIterator>
- #include <QFile>
- #include <QFileInfo>
- #include <QJsonDocument>
- #include <QStringList>
- #include <QTextStream>
- #include <assert.h>
- #include <math.h>
- #include <sekai/MBRSynth.h>
- #include <sekai/ControlTrack.h>
- #include <sekai/midi.h>
- #include <sndfile.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <QProcess>
- //TODO
- static MBRConfig config;
- #define REALTIME false
- #define STATE_IDLE 0
- #define STATE_RUNNING 1
- #define STATE_RESUME_ESPEAK 2
- #define STATE_CONVERT_SAMPLERATE 3
- //FIXME: sometimes crashes when espeak is used
- bool fileExists(QString path) {
- QFileInfo check_file(path);
- // check if file exists and if yes: Is it really a file and no directory?
- return check_file.exists() && check_file.isFile();
- }
- void runSynth(VoiceSampler* synth, QString wavFileName, int samplerate) {
- SF_INFO info;
- memset(&info,0,sizeof(info)); //XXX: create memzero macro
- info.samplerate = samplerate;
- info.channels = 1;
- info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
- SNDFILE* sf = sf_open(wavFileName.toUtf8().data(), SFM_WRITE, &info);
- while (1) {
- const int size = 1024;
- int fill = size * 4;
- float buffer_out[size];
- if (synth->readData(buffer_out, size, fill) == false) break;
- sf_write_float(sf, buffer_out, size);
- }
- }
- /// manifest
- QString VoSamp::name() { return "VoSamp"; }
- QString VoSamp::description() { return "A real time singing synthesizer"; }
- QString VoSamp::version() { return "19.10"; }
- /// setup
- void VoSamp::setup(IController* ctrl) {
- this->_ctrl = ctrl;
- this->_jack_samplerate = ctrl->sampleRate();
- #if 0
- if(_jack_samplerate!=_samplerate)
- {
- // may depend on current voicebank
- // create temp files, resample offline, warn mismatch
- }
- #endif
- // thread queue signaling
- connect(this, &VoSamp::logDebug, this, &VoSamp::on_logDebug);
- connect(this, &VoSamp::logError, this, &VoSamp::on_logError);
- connect(this, &VoSamp::logSuccess, this, &VoSamp::on_logSuccess);
- }
- bool VoSamp::setCacheDir(QString cacheDir) {
- /// ONLY some backends use this
- _cacheDir = cacheDir;
- return true;
- }
- bool VoSamp::synthIsRealtime() { return REALTIME; }
- void VoSamp::runESPEAK(QString outfile, QString lyric, int noteNum, QString voice) {
- QProcess* p = new QProcess();
- p->setProgram("espeak-sg");
- QStringList args;
- int rate=100;
- //usage: espeak-sg voice f0 lyric rate filename
- args << voice;
- int f0 = static_cast<int>(frequencyFromNote(noteNum));
- args << QVariant(f0).toString();
- args << lyric;
- args << QVariant(rate).toString();
- args << outfile;
- p->setArguments(args);
- connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));
- if (_process_count < 10) {
- _process_count++;
- p->start();
- _runningProc.push_back(p);
- } else {
- _scheduledProc.push_back(p);
- }
- }
- void VoSamp::convertSamplerate(QString infile, QString outfile, int fs) {
- QProcess* p = new QProcess();
- p->setProgram("sndfile-resample");
- QStringList args;
- args << "-to";
- args << QVariant(fs).toString();
- args << "-c";
- args << "0";
- args << infile;
- args << outfile;
- p->setArguments(args);
- connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));
- p->start();
- }
- void VoSamp::processFinished() {
- if (_scheduledProc.size() > 0) {
- QProcess* p = _scheduledProc[0];
- _scheduledProc.removeFirst();
- _process_count++;
- p->start();
- _runningProc.push_back(p);
- } else {
- QMutableListIterator<QProcess*> it(_runningProc);
- while (it.hasNext()) {
- QProcess* p = it.next();
- if (p->state() == QProcess::NotRunning) {
- delete p;
- it.remove();
- }
- }
- }
- if (_runningProc.size() == 0 && _scheduledProc.size() == 0) {
- DEVLOG_DEBUG("no processes");
- if (_state == STATE_RESUME_ESPEAK) {
- resumeESPEAK();
- }
- if (_state == STATE_CONVERT_SAMPLERATE) {
- startPlaybackSamplerate();
- }
- }
- }
- void VoSamp::startPlaybackSamplerate() {
- QString fn2 = "/tmp/qtau_synth.wav";
- _ctrl->startOfflinePlayback(fn2);
- _state = STATE_RUNNING;
- }
- void VoSamp::resumeESPEAK() {
- _timeline->fix();
- _ctrack->fix();
- MBRSynth* mbr_synth = new MBRSynth(_ctrack);
- config.type=synthType::ESPEAK;
- config.mbr_period=0;
- config.frame_period=0;
- config.fft_size=0;
- mbr_synth->setConfig(&config);
- _timeline->outputPho(mbr_synth);//crash here
- if (REALTIME == false) // not realtime
- {
- QString fn2 = "/tmp/qtau_synth.wav";
- QString fn = "/tmp/qtau_synth_espeak.wav";
- _ctrl->logSuccess("running MBRSynth in eSpeak mode");
- runSynth(mbr_synth, fn, mbr_synth->samplerate());
- _ctrl->logSuccess("start playback");
- if (mbr_synth->samplerate() == _jack_samplerate)
- _ctrl->startOfflinePlayback(fn);
- else {
- _state = STATE_CONVERT_SAMPLERATE;
- convertSamplerate(fn, fn2, _jack_samplerate);
- //_ctrl->startOfflinePlayback(fn2);
- }
- delete mbr_synth;
- }
- }
- bool VoSamp::synthesize(IScore* score) {
- _state = STATE_RUNNING;
- DEVLOG_DEBUG("VoSamp::synthesize voicepath=<" + _voicePath + ">");
- _timeline = nullptr;
- _ctrack = new ControlTrack();
- QString otoini = _voicePath + "/oto.ini";
- QString espeakjson = _voicePath + "/eSpeak.json";
- bool utau = false;
- bool espeak = false;
- if (fileExists(espeakjson)) {
- _timeline = sinsy::newTimelineESPEAK();
- QFile file;
- file.setFileName(espeakjson);
- file.open(QIODevice::ReadOnly | QIODevice::Text);
- auto val = file.readAll();
- file.close();
- QJsonDocument j = QJsonDocument::fromJson(val);
- _espeakVoice = j["voice"].toString();
- if(_espeakVoice.length()==0) _espeakVoice="de";
- espeak = true;
- } else if (fileExists(otoini) && fileExists(_voicePath + "/oto.json")) {
- _timeline = sinsy::newTimelineUTAU(otoini.toStdString(),"UTF8");
- if (_timeline->isValid()) {
- _ctrl->logSuccess("Found valid UTAU voicebank at " + _voicePath);
- }
- utau = true;
- } else {
- QString msg = "Voicebank at " + _voicePath + " is not a valid voicebank";
- _ctrl->logError(msg);
- return false;
- }
- int noteCount = score->getNoteCount();
- for (int i = 0; i < noteCount; i++) {
- auto note = score->getNote(i);
- _timeline->addLyric(note.lyric.toStdString(), note.start, note.lenght,
- note.pitch);
- // deprecate addNote -> simplify not hander
- float f0 = static_cast<float>(frequencyFromNote(note.pitch));
- if(note.pit.size()==0)
- {
- if(i==0) _ctrack->addPoint(0,f0);
- _ctrack->addPoint(note.start + 0.2f * note.lenght,f0);
- _ctrack->addPoint(note.start + 0.5f * note.lenght,f0);
- _ctrack->addPoint(note.start + 0.8f * note.lenght,f0);
- }
- else {
- for(int i=0;i<note.pit.size();i++)
- {
- auto f0 = frequencyFromNote(note.pitch+note.pit[i].y);
- //f0 += note.pit[i].y_add;
- _ctrack->addPoint(note.pit[i].x,f0);
- }
- }
- if(note.dyn.size()>=2)
- for(int i=0;i<note.dyn.size()-1;i++)
- {
- DynSegment d;
- float p0 = note.dyn[i].x;
- float p1 = note.dyn[i+1].x;
- //d.start=note.start+note.lenght*p0;
- //d.end=note.start+note.lenght*p1;
- d.start = p0;
- d.end = p1;
- d.dyn0=note.dyn[i].y;
- d.dyn1=note.dyn[i+1].y;
- _ctrack->addDynamicsSegment(d);
- DEVLOG_DEBUG("setDYN"<<p0<<p1<<d.dyn0<<d.dyn1);
- }
- }
- if (espeak) {
- _ctrl->logSuccess("generate units for espeak");
- // generate units
- int count = _timeline->getEventCount();
- //bool update = false;
- for (int i = 0; i < count; i++) {
- QString lyric = QString::fromStdString(_timeline->getLyric(i));
- int pitch = _timeline->getPitch(i, false);
- QString pitch2 = QVariant(pitch).toString();
- QString voice = _espeakVoice;
- //+ optional voice variant
- QString phofile = _voicePath + "/_u" + pitch2 + "_v"+ voice + "_" + lyric + ".pho";
- if (!fileExists(phofile)) {
- runESPEAK(phofile, lyric, pitch, voice);
- _state = STATE_RESUME_ESPEAK;
- }
- // set pho file path
- _timeline->setParam(i, "FileName", phofile.toStdString());
- }
- if (_state == STATE_RESUME_ESPEAK) return true;
- }
- if (espeak) resumeESPEAK();
- if (utau) {
- _timeline->fix();
- _ctrack->fix();
- // works but needs refactoring
- // UTAU synth: reads json
- MBRSynth* utau_synth = new MBRSynth(_ctrack);
- utau_synth->setBasedir(_voicePath.toStdString());
- config.type=synthType::UTAU;
- config.mbr_period=0;
- config.frame_period=0;
- config.fft_size=0;
- utau_synth->setConfig(&config);
- _timeline->outputPho(utau_synth);
- if (REALTIME == false) // not realtime
- {
- QString fn = "/tmp/qtau_synth.wav";
- _ctrl->logSuccess("running MBRSynth in UTAU mode");
- runSynth(utau_synth, fn, utau_synth->samplerate());
- _ctrl->logSuccess("start playback");
- _ctrl->startOfflinePlayback(fn);
- delete utau_synth;
- return true;
- }
- }
- return false;
- }
- //
- int VoSamp::readData(float* data, int size) {
- if (_samp) {
- bool has_data = _samp->readData(data, size, _samp_fill);
- if (has_data == false) return 0;
- }
- return size;
- }
- /// phoneme transformation, todo write plugin runner, define phrase api
- /// TODO implement later
- QString VoSamp::getTranscription(QString txt) {
- return txt; // extenal NLP (python plugin, sync)
- }
- bool VoSamp::doPhonemeTransformation(QStringList& list) {
- // TODO lyrizer
- DEVLOG_DEBUG(STR(list.count()));
- return false;
- }
- /// voice list (fix this there is only one)
- bool VoSamp::setVoicePath(QString voicePath) {
- _voicePath = voicePath;
- //TODO: read oto.json
- return true;
- }
- /// logging (helper) (refactor this)
- void VoSamp::on_logDebug(QString debug) { _ctrl->logDebug(debug); }
- void VoSamp::on_logError(QString error) { _ctrl->logError(error); }
- void VoSamp::on_logSuccess(QString success) { _ctrl->logSuccess(success); }
- //
|