vosamp.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /*
  2. This file is part of QTau
  3. Copyright (C) 2013-2020 Tobias "Tomoko" Platen <tplaten@posteo.de>
  4. Copyright (C) 2013 digited <https://github.com/digited>
  5. Copyright (C) 2010-2013 HAL@ShurabaP <https://github.com/haruneko>
  6. QTau is free software: you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation, either version 3 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. SPDX-License-Identifier: GPL-3.0+
  17. */
  18. #include "vosamp.h"
  19. #include "../editor/ustjkeys.h"
  20. #include <QDebug>
  21. #include <QDir>
  22. #include <QDirIterator>
  23. #include <QFile>
  24. #include <QFileInfo>
  25. #include <QJsonDocument>
  26. #include <QStringList>
  27. #include <QTextStream>
  28. #include <assert.h>
  29. #include <math.h>
  30. #include <sekai/MBRSynth.h>
  31. #include <sekai/ControlTrack.h>
  32. #include <sekai/midi.h>
  33. #include <sndfile.h>
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <unistd.h>
  37. #include <QProcess>
  38. //TODO
  39. static MBRConfig config;
  40. #define REALTIME false
  41. #define STATE_IDLE 0
  42. #define STATE_RUNNING 1
  43. #define STATE_RESUME_ESPEAK 2
  44. #define STATE_CONVERT_SAMPLERATE 3
  45. //FIXME: sometimes crashes when espeak is used
  46. bool fileExists(QString path) {
  47. QFileInfo check_file(path);
  48. // check if file exists and if yes: Is it really a file and no directory?
  49. return check_file.exists() && check_file.isFile();
  50. }
  51. void runSynth(VoiceSampler* synth, QString wavFileName, int samplerate) {
  52. SF_INFO info;
  53. memset(&info,0,sizeof(info)); //XXX: create memzero macro
  54. info.samplerate = samplerate;
  55. info.channels = 1;
  56. info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16;
  57. SNDFILE* sf = sf_open(wavFileName.toUtf8().data(), SFM_WRITE, &info);
  58. while (1) {
  59. const int size = 1024;
  60. int fill = size * 4;
  61. float buffer_out[size];
  62. if (synth->readData(buffer_out, size, fill) == false) break;
  63. sf_write_float(sf, buffer_out, size);
  64. }
  65. }
  66. /// manifest
  67. QString VoSamp::name() { return "VoSamp"; }
  68. QString VoSamp::description() { return "A real time singing synthesizer"; }
  69. QString VoSamp::version() { return "19.10"; }
  70. /// setup
  71. void VoSamp::setup(IController* ctrl) {
  72. this->_ctrl = ctrl;
  73. this->_jack_samplerate = ctrl->sampleRate();
  74. #if 0
  75. if(_jack_samplerate!=_samplerate)
  76. {
  77. // may depend on current voicebank
  78. // create temp files, resample offline, warn mismatch
  79. }
  80. #endif
  81. // thread queue signaling
  82. connect(this, &VoSamp::logDebug, this, &VoSamp::on_logDebug);
  83. connect(this, &VoSamp::logError, this, &VoSamp::on_logError);
  84. connect(this, &VoSamp::logSuccess, this, &VoSamp::on_logSuccess);
  85. }
  86. bool VoSamp::setCacheDir(QString cacheDir) {
  87. /// ONLY some backends use this
  88. _cacheDir = cacheDir;
  89. return true;
  90. }
  91. bool VoSamp::synthIsRealtime() { return REALTIME; }
  92. void VoSamp::runESPEAK(QString outfile, QString lyric, int noteNum, QString voice) {
  93. QProcess* p = new QProcess();
  94. p->setProgram("espeak-sg");
  95. QStringList args;
  96. int rate=100;
  97. //usage: espeak-sg voice f0 lyric rate filename
  98. args << voice;
  99. int f0 = static_cast<int>(frequencyFromNote(noteNum));
  100. args << QVariant(f0).toString();
  101. args << lyric;
  102. args << QVariant(rate).toString();
  103. args << outfile;
  104. p->setArguments(args);
  105. connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));
  106. if (_process_count < 10) {
  107. _process_count++;
  108. p->start();
  109. _runningProc.push_back(p);
  110. } else {
  111. _scheduledProc.push_back(p);
  112. }
  113. }
  114. void VoSamp::convertSamplerate(QString infile, QString outfile, int fs) {
  115. QProcess* p = new QProcess();
  116. p->setProgram("sndfile-resample");
  117. QStringList args;
  118. args << "-to";
  119. args << QVariant(fs).toString();
  120. args << "-c";
  121. args << "0";
  122. args << infile;
  123. args << outfile;
  124. p->setArguments(args);
  125. connect(p, SIGNAL(finished(int)), this, SLOT(processFinished()));
  126. p->start();
  127. }
  128. void VoSamp::processFinished() {
  129. if (_scheduledProc.size() > 0) {
  130. QProcess* p = _scheduledProc[0];
  131. _scheduledProc.removeFirst();
  132. _process_count++;
  133. p->start();
  134. _runningProc.push_back(p);
  135. } else {
  136. QMutableListIterator<QProcess*> it(_runningProc);
  137. while (it.hasNext()) {
  138. QProcess* p = it.next();
  139. if (p->state() == QProcess::NotRunning) {
  140. delete p;
  141. it.remove();
  142. }
  143. }
  144. }
  145. if (_runningProc.size() == 0 && _scheduledProc.size() == 0) {
  146. DEVLOG_DEBUG("no processes");
  147. if (_state == STATE_RESUME_ESPEAK) {
  148. resumeESPEAK();
  149. }
  150. if (_state == STATE_CONVERT_SAMPLERATE) {
  151. startPlaybackSamplerate();
  152. }
  153. }
  154. }
  155. void VoSamp::startPlaybackSamplerate() {
  156. QString fn2 = "/tmp/qtau_synth.wav";
  157. _ctrl->startOfflinePlayback(fn2);
  158. _state = STATE_RUNNING;
  159. }
  160. void VoSamp::resumeESPEAK() {
  161. _timeline->fix();
  162. _ctrack->fix();
  163. MBRSynth* mbr_synth = new MBRSynth(_ctrack);
  164. config.type=synthType::ESPEAK;
  165. config.mbr_period=0;
  166. config.frame_period=0;
  167. config.fft_size=0;
  168. mbr_synth->setConfig(&config);
  169. _timeline->outputPho(mbr_synth);//crash here
  170. if (REALTIME == false) // not realtime
  171. {
  172. QString fn2 = "/tmp/qtau_synth.wav";
  173. QString fn = "/tmp/qtau_synth_espeak.wav";
  174. _ctrl->logSuccess("running MBRSynth in eSpeak mode");
  175. runSynth(mbr_synth, fn, mbr_synth->samplerate());
  176. _ctrl->logSuccess("start playback");
  177. if (mbr_synth->samplerate() == _jack_samplerate)
  178. _ctrl->startOfflinePlayback(fn);
  179. else {
  180. _state = STATE_CONVERT_SAMPLERATE;
  181. convertSamplerate(fn, fn2, _jack_samplerate);
  182. //_ctrl->startOfflinePlayback(fn2);
  183. }
  184. delete mbr_synth;
  185. }
  186. }
  187. bool VoSamp::synthesize(IScore* score) {
  188. _state = STATE_RUNNING;
  189. DEVLOG_DEBUG("VoSamp::synthesize voicepath=<" + _voicePath + ">");
  190. _timeline = nullptr;
  191. _ctrack = new ControlTrack();
  192. QString otoini = _voicePath + "/oto.ini";
  193. QString espeakjson = _voicePath + "/eSpeak.json";
  194. bool utau = false;
  195. bool espeak = false;
  196. if (fileExists(espeakjson)) {
  197. _timeline = sinsy::newTimelineESPEAK();
  198. QFile file;
  199. file.setFileName(espeakjson);
  200. file.open(QIODevice::ReadOnly | QIODevice::Text);
  201. auto val = file.readAll();
  202. file.close();
  203. QJsonDocument j = QJsonDocument::fromJson(val);
  204. _espeakVoice = j["voice"].toString();
  205. if(_espeakVoice.length()==0) _espeakVoice="de";
  206. espeak = true;
  207. } else if (fileExists(otoini) && fileExists(_voicePath + "/oto.json")) {
  208. _timeline = sinsy::newTimelineUTAU(otoini.toStdString(),"UTF8");
  209. if (_timeline->isValid()) {
  210. _ctrl->logSuccess("Found valid UTAU voicebank at " + _voicePath);
  211. }
  212. utau = true;
  213. } else {
  214. QString msg = "Voicebank at " + _voicePath + " is not a valid voicebank";
  215. _ctrl->logError(msg);
  216. return false;
  217. }
  218. int noteCount = score->getNoteCount();
  219. for (int i = 0; i < noteCount; i++) {
  220. auto note = score->getNote(i);
  221. _timeline->addLyric(note.lyric.toStdString(), note.start, note.lenght,
  222. note.pitch);
  223. // deprecate addNote -> simplify not hander
  224. float f0 = static_cast<float>(frequencyFromNote(note.pitch));
  225. if(note.pit.size()==0)
  226. {
  227. if(i==0) _ctrack->addPoint(0,f0);
  228. _ctrack->addPoint(note.start + 0.2f * note.lenght,f0);
  229. _ctrack->addPoint(note.start + 0.5f * note.lenght,f0);
  230. _ctrack->addPoint(note.start + 0.8f * note.lenght,f0);
  231. }
  232. else {
  233. for(int i=0;i<note.pit.size();i++)
  234. {
  235. auto f0 = frequencyFromNote(note.pitch+note.pit[i].y);
  236. //f0 += note.pit[i].y_add;
  237. _ctrack->addPoint(note.pit[i].x,f0);
  238. }
  239. }
  240. if(note.dyn.size()>=2)
  241. for(int i=0;i<note.dyn.size()-1;i++)
  242. {
  243. DynSegment d;
  244. float p0 = note.dyn[i].x;
  245. float p1 = note.dyn[i+1].x;
  246. //d.start=note.start+note.lenght*p0;
  247. //d.end=note.start+note.lenght*p1;
  248. d.start = p0;
  249. d.end = p1;
  250. d.dyn0=note.dyn[i].y;
  251. d.dyn1=note.dyn[i+1].y;
  252. _ctrack->addDynamicsSegment(d);
  253. DEVLOG_DEBUG("setDYN"<<p0<<p1<<d.dyn0<<d.dyn1);
  254. }
  255. }
  256. if (espeak) {
  257. _ctrl->logSuccess("generate units for espeak");
  258. // generate units
  259. int count = _timeline->getEventCount();
  260. //bool update = false;
  261. for (int i = 0; i < count; i++) {
  262. QString lyric = QString::fromStdString(_timeline->getLyric(i));
  263. int pitch = _timeline->getPitch(i, false);
  264. QString pitch2 = QVariant(pitch).toString();
  265. QString voice = _espeakVoice;
  266. //+ optional voice variant
  267. QString phofile = _voicePath + "/_u" + pitch2 + "_v"+ voice + "_" + lyric + ".pho";
  268. if (!fileExists(phofile)) {
  269. runESPEAK(phofile, lyric, pitch, voice);
  270. _state = STATE_RESUME_ESPEAK;
  271. }
  272. // set pho file path
  273. _timeline->setParam(i, "FileName", phofile.toStdString());
  274. }
  275. if (_state == STATE_RESUME_ESPEAK) return true;
  276. }
  277. if (espeak) resumeESPEAK();
  278. if (utau) {
  279. _timeline->fix();
  280. _ctrack->fix();
  281. // works but needs refactoring
  282. // UTAU synth: reads json
  283. MBRSynth* utau_synth = new MBRSynth(_ctrack);
  284. utau_synth->setBasedir(_voicePath.toStdString());
  285. config.type=synthType::UTAU;
  286. config.mbr_period=0;
  287. config.frame_period=0;
  288. config.fft_size=0;
  289. utau_synth->setConfig(&config);
  290. _timeline->outputPho(utau_synth);
  291. if (REALTIME == false) // not realtime
  292. {
  293. QString fn = "/tmp/qtau_synth.wav";
  294. _ctrl->logSuccess("running MBRSynth in UTAU mode");
  295. runSynth(utau_synth, fn, utau_synth->samplerate());
  296. _ctrl->logSuccess("start playback");
  297. _ctrl->startOfflinePlayback(fn);
  298. delete utau_synth;
  299. return true;
  300. }
  301. }
  302. return false;
  303. }
  304. //
  305. int VoSamp::readData(float* data, int size) {
  306. if (_samp) {
  307. bool has_data = _samp->readData(data, size, _samp_fill);
  308. if (has_data == false) return 0;
  309. }
  310. return size;
  311. }
  312. /// phoneme transformation, todo write plugin runner, define phrase api
  313. /// TODO implement later
  314. QString VoSamp::getTranscription(QString txt) {
  315. return txt; // extenal NLP (python plugin, sync)
  316. }
  317. bool VoSamp::doPhonemeTransformation(QStringList& list) {
  318. // TODO lyrizer
  319. DEVLOG_DEBUG(STR(list.count()));
  320. return false;
  321. }
  322. /// voice list (fix this there is only one)
  323. bool VoSamp::setVoicePath(QString voicePath) {
  324. _voicePath = voicePath;
  325. //TODO: read oto.json
  326. return true;
  327. }
  328. /// logging (helper) (refactor this)
  329. void VoSamp::on_logDebug(QString debug) { _ctrl->logDebug(debug); }
  330. void VoSamp::on_logError(QString error) { _ctrl->logError(error); }
  331. void VoSamp::on_logSuccess(QString success) { _ctrl->logSuccess(success); }
  332. //