necklace
/
qtau
forked from isengaara/qtau


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
							/*
    This file is part of QTau
    Copyright (C) 2013-2020  Tobias "Tomoko" Platen <tplaten@posteo.de>
    Copyright (C) 2013       digited       <https://github.com/digited>
    Copyright (C) 2010-2013  HAL@ShurabaP  <https://github.com/haruneko>

    QTau is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

    SPDX-License-Identifier: GPL-3.0+
*/

#include "vosamp.h"
#include "../editor/ustjkeys.h"

#include <QDebug>
#include <QDir>
#include <QDirIterator>
#include <QFile>
#include <QFileInfo>
#include <QJsonDocument>
#include <QStringList>
#include <QTextStream>

#include <assert.h>
#include <math.h>
#include <sekai/MBRSynth.h>
#include <sekai/ControlTrack.h>
#include <sekai/midi.h>
#include <sndfile.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <QProcess>

//TODO
static MBRConfig config;

#define REALTIME false

#define STATE_IDLE 0
#define STATE_RUNNING 1
#define STATE_RESUME_ESPEAK 2
#define STATE_CONVERT_SAMPLERATE 3

//FIXME: sometimes crashes when espeak is used

bool fileExists(QString path) {
  QFileInfo check_file(path);
  // check if file exists and if yes: Is it really a file and no directory?
  return check_file.exists() && check_file.isFile();
}


void runSynth(VoiceSampler* synth, QString wavFileName, int samplerate) {
  SF_INFO info;
  memset(&info,0,sizeof(info)); //XXX: create memzero macro

  info.samplerate = samplerate;
  info.channels = 1;
  info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;

  SNDFILE* sf = sf_open(wavFileName.toUtf8().data(), SFM_WRITE, &info);

  while (1) {
    const int size = 1024;
    int fill = size * 4;

    float buffer_out[size];
    if (synth->readData(buffer_out, size, fill) == false) break;
    sf_write_float(sf, buffer_out, size);
  }
}


/// manifest
QString VoSamp::name() { return "VoSamp"; }
QString VoSamp::description() { return "A real time singing synthesizer"; }
QString VoSamp::version() { return "19.10"; }

/// setup
void VoSamp::setup(IController* ctrl) {
  this->_ctrl = ctrl;
  this->_jack_samplerate = ctrl->sampleRate();

#if 0
    if(_jack_samplerate!=_samplerate)
    {
        // may depend on current voicebank
        // create temp files, resample offline, warn mismatch
    }
#endif

  // thread queue signaling
  connect(this, &VoSamp::logDebug, this, &VoSamp::on_logDebug);
  connect(this, &VoSamp::logError, this, &VoSamp::on_logError);
  connect(this, &VoSamp::logSuccess, this, &VoSamp::on_logSuccess);
}

bool VoSamp::setCacheDir(QString cacheDir) {
  /// ONLY some backends use this
  _cacheDir = cacheDir;
  return true;
}

bool VoSamp::synthIsRealtime() { return REALTIME; }

void VoSamp::runESPEAK(QString outfile, QString lyric, int noteNum, QString voice) {
  QProcess* p = new QProcess();
  p->setProgram("espeak-sg");
  QStringList args;
  int rate=100;

  //usage: espeak-sg voice f0 lyric rate filename
  args << voice;
  int f0 = static_cast<int>(frequencyFromNote(noteNum));

  args << QVariant(f0).toString();
  args << lyric;
  args << QVariant(rate).toString();
  args << outfile;
  p->setArguments(args);

  connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));

  if (_process_count < 10) {
    _process_count++;
    p->start();
    _runningProc.push_back(p);
  } else {
    _scheduledProc.push_back(p);
  }
}

void VoSamp::convertSamplerate(QString infile, QString outfile, int fs) {
  QProcess* p = new QProcess();
  p->setProgram("sndfile-resample");
  QStringList args;
  args << "-to";
  args << QVariant(fs).toString();
  args << "-c";
  args << "0";
  args << infile;
  args << outfile;
  p->setArguments(args);
  connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));
  p->start();
}

void VoSamp::processFinished() {
  if (_scheduledProc.size() > 0) {
    QProcess* p = _scheduledProc[0];
    _scheduledProc.removeFirst();
    _process_count++;
    p->start();
    _runningProc.push_back(p);

  } else {
    QMutableListIterator<QProcess*> it(_runningProc);
    while (it.hasNext()) {
      QProcess* p = it.next();
      if (p->state() == QProcess::NotRunning) {
        delete p;
        it.remove();
      }
    }
  }
  if (_runningProc.size() == 0 && _scheduledProc.size() == 0) {
    DEVLOG_DEBUG("no processes");
    if (_state == STATE_RESUME_ESPEAK) {
      resumeESPEAK();
    }
    if (_state == STATE_CONVERT_SAMPLERATE) {
      startPlaybackSamplerate();
    }
  }
}

void VoSamp::startPlaybackSamplerate() {
  QString fn2 = "/tmp/qtau_synth.wav";
  _ctrl->startOfflinePlayback(fn2);
  _state = STATE_RUNNING;
}

void VoSamp::resumeESPEAK() {
  _timeline->fix();
  _ctrack->fix();

  MBRSynth* mbr_synth = new MBRSynth(_ctrack);
  config.type=synthType::ESPEAK;
  config.mbr_period=0;
  config.frame_period=0;
  config.fft_size=0;
  mbr_synth->setConfig(&config);

  _timeline->outputPho(mbr_synth);//crash here
  if (REALTIME == false)  // not realtime
  {
    QString fn2 = "/tmp/qtau_synth.wav";
    QString fn = "/tmp/qtau_synth_espeak.wav";
    _ctrl->logSuccess("running MBRSynth in eSpeak mode");
    runSynth(mbr_synth, fn, mbr_synth->samplerate());
    _ctrl->logSuccess("start playback");

    if (mbr_synth->samplerate() == _jack_samplerate)
      _ctrl->startOfflinePlayback(fn);
    else {
      _state = STATE_CONVERT_SAMPLERATE;
      convertSamplerate(fn, fn2, _jack_samplerate);
      //_ctrl->startOfflinePlayback(fn2);
    }
    delete mbr_synth;
  }
}

bool VoSamp::synthesize(IScore* score) {
  _state = STATE_RUNNING;
  DEVLOG_DEBUG("VoSamp::synthesize voicepath=<" + _voicePath + ">");

  _timeline = nullptr;
  _ctrack = new ControlTrack();

  QString otoini = _voicePath + "/oto.ini";
  QString espeakjson = _voicePath + "/eSpeak.json";

  bool utau = false;
  bool espeak = false;

  if (fileExists(espeakjson)) {
    _timeline = sinsy::newTimelineESPEAK();


    QFile file;
    file.setFileName(espeakjson);
    file.open(QIODevice::ReadOnly | QIODevice::Text);
    auto val = file.readAll();
    file.close();
    QJsonDocument j = QJsonDocument::fromJson(val);
    _espeakVoice = j["voice"].toString();
    if(_espeakVoice.length()==0) _espeakVoice="de";

    espeak = true;
  } else if (fileExists(otoini) && fileExists(_voicePath + "/oto.json")) {
    _timeline = sinsy::newTimelineUTAU(otoini.toStdString(),"UTF8");
    if (_timeline->isValid()) {
      _ctrl->logSuccess("Found valid UTAU voicebank at " + _voicePath);
    }
    utau = true;
  } else {
    QString msg = "Voicebank at " + _voicePath + " is not a valid voicebank";
    _ctrl->logError(msg);
    return false;
  }


  int noteCount = score->getNoteCount();
  for (int i = 0; i < noteCount; i++) {
    auto note = score->getNote(i);
    _timeline->addLyric(note.lyric.toStdString(), note.start, note.lenght,
                        note.pitch);
    // deprecate addNote -> simplify not hander

    float f0 = static_cast<float>(frequencyFromNote(note.pitch));
    if(note.pit.size()==0)
    {

        if(i==0) _ctrack->addPoint(0,f0);
        _ctrack->addPoint(note.start + 0.2f * note.lenght,f0);
        _ctrack->addPoint(note.start + 0.5f * note.lenght,f0);
        _ctrack->addPoint(note.start + 0.8f * note.lenght,f0);
    }
    else {
        for(int i=0;i<note.pit.size();i++)
        {
            auto f0 = frequencyFromNote(note.pitch+note.pit[i].y);
            //f0 += note.pit[i].y_add;
            _ctrack->addPoint(note.pit[i].x,f0);
        }
    }

    if(note.dyn.size()>=2)
    for(int i=0;i<note.dyn.size()-1;i++)
    {
        DynSegment d;
        float p0 = note.dyn[i].x;
        float p1 = note.dyn[i+1].x;
        //d.start=note.start+note.lenght*p0;
        //d.end=note.start+note.lenght*p1;
        d.start = p0;
        d.end = p1;
        d.dyn0=note.dyn[i].y;
        d.dyn1=note.dyn[i+1].y;
        _ctrack->addDynamicsSegment(d);
        DEVLOG_DEBUG("setDYN"<<p0<<p1<<d.dyn0<<d.dyn1);
    }

  }

  if (espeak) {
    _ctrl->logSuccess("generate units for espeak");
    // generate units
    int count = _timeline->getEventCount();
    //bool update = false;

    for (int i = 0; i < count; i++) {
      QString lyric = QString::fromStdString(_timeline->getLyric(i));
      int pitch = _timeline->getPitch(i, false);
      QString pitch2 = QVariant(pitch).toString();
      QString voice = _espeakVoice;
      //+ optional voice variant
      QString phofile = _voicePath + "/_u" + pitch2 + "_v"+ voice + "_" + lyric + ".pho";
      if (!fileExists(phofile)) {
        runESPEAK(phofile, lyric, pitch, voice);
        _state = STATE_RESUME_ESPEAK;
      }
      // set pho file path
      _timeline->setParam(i, "FileName", phofile.toStdString());
    }
    if (_state == STATE_RESUME_ESPEAK) return true;
  }

  if (espeak) resumeESPEAK();

  if (utau) {
    _timeline->fix();
    _ctrack->fix();
    // works but needs refactoring
    // UTAU synth: reads json
    MBRSynth* utau_synth = new MBRSynth(_ctrack);
    utau_synth->setBasedir(_voicePath.toStdString());
    config.type=synthType::UTAU;
    config.mbr_period=0;
    config.frame_period=0;
    config.fft_size=0;
    utau_synth->setConfig(&config);
    _timeline->outputPho(utau_synth);
    if (REALTIME == false)  // not realtime
    {
      QString fn = "/tmp/qtau_synth.wav";

      _ctrl->logSuccess("running MBRSynth in UTAU mode");
      runSynth(utau_synth, fn, utau_synth->samplerate());
      _ctrl->logSuccess("start playback");
      _ctrl->startOfflinePlayback(fn);
      delete utau_synth;
      return true;
    }
  }

  return false;
}

//
int VoSamp::readData(float* data, int size) {
  if (_samp) {
      bool has_data = _samp->readData(data, size, _samp_fill);
      if (has_data == false) return 0;
  }
  return size;
}

/// phoneme transformation, todo write plugin runner, define phrase api
/// TODO implement later

QString VoSamp::getTranscription(QString txt) {
  return txt;  // extenal NLP (python plugin, sync)
}

bool VoSamp::doPhonemeTransformation(QStringList& list) {
  // TODO lyrizer
  DEVLOG_DEBUG(STR(list.count()));
  return false;
}

/// voice list (fix this there is only one)

bool VoSamp::setVoicePath(QString voicePath) {
  _voicePath = voicePath;
  //TODO: read oto.json
  return true;
}

/// logging (helper) (refactor this)
void VoSamp::on_logDebug(QString debug) { _ctrl->logDebug(debug); }

void VoSamp::on_logError(QString error) { _ctrl->logError(error); }

void VoSamp::on_logSuccess(QString success) { _ctrl->logSuccess(success); }

//