Synthesizer.cpp 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153
  1. /*
  2. * Synthesizer.cpp
  3. * Copyright © 2010-2012 HAL, 2012 kbinani
  4. *
  5. * This file is part of vConnect-STAND.
  6. *
  7. * vConnect-STAND is free software; you can redistribute it and/or
  8. * modify it under the terms of the GPL License.
  9. *
  10. * vConnect-STAND is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. */
  14. #include <time.h>
  15. #include <math.h>
  16. #include <tuple>
  17. #include <vorbis/vorbisfile.h>
  18. #include "stand.h"
  19. #include "Configuration.h"
  20. #include "Synthesizer.h"
  21. #include "vConnectPhoneme.h"
  22. #include "vConnectUtility.h"
  23. #include "vsq/EventList.h"
  24. #include "Thread.h"
  25. #include "utau/UtauDBManager.h"
  26. #include "vsq/CurveTypeEnum.h"
  27. #include "world/world.h"
  28. #include "WaveBuffer/WaveBuffer.h"
  29. #define TRANS_MAX 4096
  30. double temporary1[TRANS_MAX];
  31. double temporary2[TRANS_MAX];
  32. double temporary3[TRANS_MAX];
  33. double Synthesizer::noiseWave[NOISE_LEN];
  34. double Synthesizer::mNoteFrequency[NOTE_NUM];
  35. double Synthesizer::mVibrato[VIB_NUM];
  36. using namespace vconnect;
  37. ThreadWorkerReturnType ThreadWorkerDeclspec synthesizeFromList( void *arg );
  38. struct vConnectData {
  39. vConnectPhoneme *phoneme;
  40. int index;
  41. double morphRatio;
  42. };
  43. struct vConnectFrame {
  44. list<vConnectData *> dataList;
  45. };
  46. struct vConnectArg {
  47. double *f0;
  48. double *wave;
  49. double *dynamics;
  50. int beginFrame;
  51. int endFrame;
  52. int frameOffset;
  53. int waveLength;
  54. int fftLength;
  55. vConnectFrame *frames;
  56. vector<vConnectPhoneme *> *phonemes;
  57. EventList *eventList;
  58. vector<vector<FrameBP> > *controlCurves;
  59. };
  60. class vorbisFile {
  61. public:
  62. explicit vorbisFile(int fftLength)
  63. {
  64. if(fftLength > 0)
  65. {
  66. buf = new float[fftLength];
  67. }
  68. pos = 0;
  69. prevPos = -1;
  70. }
  71. ~vorbisFile()
  72. {
  73. delete buf;
  74. }
  75. OggVorbis_File ovf;
  76. int prevPos;
  77. int pos;
  78. float *buf;
  79. };
  80. Synthesizer::Synthesizer( RuntimeOption option )
  81. : Task( option )
  82. {
  83. double a4frequency = Sequence::getA4Frequency();
  84. int a4note = Sequence::getA4NoteNumber();
  85. for( int i = 0; i < NOTE_NUM; i++ ){
  86. mNoteFrequency[i] = a4frequency * pow( 2.0, (double)(i - a4note) / 12.0 );
  87. }
  88. mVibrato[0] = 0.0;
  89. for( int i = 1; i < VIB_NUM; i++ ){
  90. double period = exp( 5.24 - 1.07e-2 * i ) * 2.0 / 1000.0;
  91. mVibrato[i] = 2.0 * ST_PI / period;
  92. }
  93. for(int i = 0; i < NOISE_LEN; i++){
  94. noiseWave[i] = randn();
  95. }
  96. time_t timer;
  97. time( &timer );
  98. srand( (unsigned int)timer );
  99. mFluctTheta = 2.0 * (double)rand() / (double)RAND_MAX * ST_PI;
  100. }
  101. Synthesizer::~Synthesizer()
  102. {
  103. for( unsigned int i = 0; i < mManagerList.size(); i++ )
  104. {
  105. SAFE_DELETE( mManagerList[i] );
  106. }
  107. }
  108. double Synthesizer::getPitchFluctuation( double second )
  109. {
  110. double result = 1.0 + ( sin( 12.7 * ST_PI * second ) + sin ( 7.1 * ST_PI * second ) + sin( 4.7 * ST_PI * second ) / 3.0 ) / 300.0;
  111. return result;
  112. }
  113. void Synthesizer::emptyPath( double secOffset, string output )
  114. {
  115. WaveBuffer wave;
  116. wave.setOffset( secOffset );
  117. wave.writeWaveFile( output );
  118. return;
  119. }
  120. void calculateFrameData(vConnectFrame *dst, int frameLength, vector<vConnectPhoneme *> &phonemes, Sequence &vsq, vector<corpusManager *> &managers, vector<FrameBP> &briCurve, int beginFrame)
  121. {
  122. vector<int> briArray;
  123. Event *itemPrev = NULL;
  124. // brightness を展開.
  125. int briIndex = 0;
  126. briArray.resize(frameLength);
  127. for(int i = 0; i < frameLength; i++)
  128. {
  129. while( i + beginFrame > briCurve[briIndex].frameTime )
  130. {
  131. briIndex++;
  132. }
  133. briArray[i] = briCurve[briIndex].value;
  134. }
  135. double framePeriod = Configuration::getMilliSecondsPerFrame();
  136. // 音符ごとに対応する音素を計算して合成リストへ追加していく.
  137. for(int i = 0; i < vsq.events.eventList.size(); i++) {
  138. Event *itemThis = vsq.events.eventList[i];
  139. Event *itemNext = (itemThis->isContinuousBack) ? vsq.events.eventList[i+1] : NULL;
  140. string lyric = itemThis->lyricHandle.getLyric();
  141. int const note_number = itemThis->note;
  142. list<corpusManager::phoneme *> phonemeList;
  143. corpusManager::phoneme * p = nullptr;
  144. managers[itemThis->singerIndex]->getPhoneme(lyric, note_number, phonemeList);
  145. double vel = pow(2.0, (double)(64 - itemThis->velocity) / 64);
  146. // 次の音符が今の音符にかぶる場合はそれの設定.
  147. int nextBeginFrame;
  148. if(itemNext) {
  149. if(itemPrev) {
  150. nextBeginFrame = max(itemNext->beginFrame, itemPrev->endFrame);
  151. } else {
  152. nextBeginFrame = itemNext->beginFrame;
  153. }
  154. }
  155. // 登録されていない音素片なら音素片リストに突っ込む.
  156. for(list<corpusManager::phoneme *>::iterator it = phonemeList.begin(); it != phonemeList.end(); it++)
  157. {
  158. p = (*it);
  159. vConnectPhoneme* phoneme = p->p;
  160. bool newPhoneme = true;
  161. for(int j = 0; j < phonemes.size(); j++)
  162. {
  163. if(phonemes[j] == phoneme)
  164. {
  165. newPhoneme = false;
  166. break;
  167. }
  168. }
  169. if(newPhoneme)
  170. {
  171. phonemes.push_back(phoneme);
  172. }
  173. }
  174. if (!p) {
  175. continue;
  176. }
  177. // 音符が有効な区間に今の音素を書き込む.
  178. for(int j = itemThis->beginFrame, index = itemThis->beginFrame - beginFrame; j < itemThis->endFrame && index < frameLength; j++, index++)
  179. {
  180. if(index < 0){ continue; }
  181. int frameIndex = (int)((j - itemThis->beginFrame) * vel);
  182. int briVal = briArray[index]; // 現在の bri 値.
  183. int minBri = -1, maxBri = 129;
  184. frameIndex = (int)max( 2.0, min( (double)frameIndex, itemThis->utauSetting.msFixedLength / framePeriod ) );
  185. // 同じフレームを使いまわしたくない場合はここを使うとよい.
  186. frameIndex = max( 2, frameIndex );
  187. if( frameIndex > itemThis->utauSetting.msFixedLength / framePeriod ){
  188. int tmpDiff = frameIndex - (int)(itemThis->utauSetting.msFixedLength / framePeriod);
  189. int tmpRoom = (p->p->getTimeLength() - (int)(itemThis->utauSetting.msFixedLength / framePeriod)) * 2 / 3;
  190. frameIndex = (int)(itemThis->utauSetting.msFixedLength / framePeriod);
  191. if( tmpDiff / tmpRoom % 2 == 0 ){
  192. frameIndex += tmpDiff % tmpRoom;
  193. }else{
  194. frameIndex += tmpRoom - tmpDiff % tmpRoom;
  195. }
  196. }
  197. // brightness の幅を計算する.
  198. for(list<corpusManager::phoneme *>::iterator it = phonemeList.begin(); it != phonemeList.end(); it++)
  199. {
  200. p = (*it);
  201. if(p->brightness < briVal)
  202. {
  203. minBri = max(p->brightness, minBri);
  204. }
  205. else
  206. {
  207. maxBri = min(p->brightness, maxBri);
  208. }
  209. }
  210. if(minBri == -1)
  211. {
  212. briVal = maxBri;
  213. }
  214. if(maxBri == 129)
  215. {
  216. briVal = minBri;
  217. }
  218. // あとどの程度空きがあるか先に計算.
  219. double morphRatio = 1.0;
  220. for(list<vConnectData *>::iterator itr = dst[index].dataList.begin(); itr!= dst[index].dataList.end(); itr++)
  221. {
  222. morphRatio -= (*itr)->morphRatio;
  223. }
  224. if(morphRatio <= 0.0)
  225. {
  226. continue;
  227. }
  228. // 対象になる音素ずつ各フレームに登録.
  229. for(list<corpusManager::phoneme *>::iterator it = phonemeList.begin(); it != phonemeList.end(); it++)
  230. {
  231. p = (*it);
  232. // brightness が範囲外.
  233. if(p->brightness < minBri || p->brightness > maxBri)
  234. {
  235. continue;
  236. }
  237. vConnectPhoneme *phoneme = p->p;
  238. vConnectData *data = new vConnectData;
  239. data->phoneme = phoneme;
  240. double baseBriRatio = 1.0 - (double)abs(briVal - p->brightness) / (double)(maxBri - minBri);
  241. baseBriRatio = max(0.0, min(1.0, baseBriRatio));
  242. if(baseBriRatio == 0.0)
  243. {
  244. delete data;
  245. continue;
  246. }
  247. if(itemThis->isContinuousBack && nextBeginFrame < j)
  248. {
  249. data->morphRatio = (double)(itemThis->endFrame - j) / (double)(itemThis->endFrame - nextBeginFrame) * morphRatio;
  250. }
  251. else
  252. {
  253. data->morphRatio = morphRatio;
  254. }
  255. double tmpIndex = (phoneme->getFrameTime(frameIndex) * (1.0 - baseBriRatio) + frameIndex * framePeriod * baseBriRatio / 1000.0) / framePeriod * 1000.0;
  256. data->index = (int)tmpIndex;
  257. if((*it)->children)
  258. {
  259. data->index = (int)(((*it)->children->p->getBaseFrameTime(data->index) * (1.0 - baseBriRatio) + tmpIndex * framePeriod * baseBriRatio / 1000.0) / framePeriod * 1000.0);
  260. }
  261. data->morphRatio *= baseBriRatio;
  262. dst[index].dataList.push_back(data);
  263. }
  264. }
  265. itemPrev = itemThis;
  266. }
  267. }
  268. void Synthesizer::run()
  269. {
  270. string input = this->option.getInputPath();
  271. string output = this->option.getOutputPath();
  272. #ifdef _DEBUG
  273. cout << "vConnect::synthesize; calling vsq.readVsqFile...";
  274. #endif
  275. // 読み込みこけたら帰る
  276. if( false == mVsq.read( input, this->option ) ){
  277. #ifdef _DEBUG
  278. cout << "vConnect::synthesize; calling vsq.readVsqFile...done, failed";
  279. #endif
  280. return;
  281. }
  282. #ifdef _DEBUG
  283. cout << "vConnect::synthesize; calling vsq.readVsqFile...done, successed";
  284. #endif
  285. // 空のときは空の wave を出力して終了
  286. if( mVsq.events.eventList.empty() && UtauDBManager::size() == 0 ){
  287. emptyPath( mVsq.getEndSec(), output );
  288. return;
  289. }
  290. long beginFrame, frameLength, waveLength;
  291. int fftLength, aperiodicityLength;
  292. double *f0, *dynamics;
  293. double *wave;
  294. for( int i = 0; i < UtauDBManager::size(); i++ )
  295. {
  296. corpusManager *p = new corpusManager;
  297. p->setUtauDB( UtauDBManager::get( i ), this->option );
  298. vector<tuple<string, int>> analyze_list;
  299. for( int j = 0; j < mVsq.events.eventList.size(); j++) {
  300. auto item = mVsq.events.eventList[j];
  301. if (item->singerIndex == i) {
  302. analyze_list.push_back(make_tuple(item->lyricHandle.getLyric(), item->note));
  303. }
  304. }
  305. p->analyze( analyze_list );
  306. mManagerList.push_back( p );
  307. }
  308. // 準備1.先行発音などパラメータの適用,及びコントロールカーブをフレーム時刻へ変換
  309. this->calculateVsqInfo();
  310. #if defined( _DEBUG )
  311. cout << "vConnect::synthesize; calling mVsq.dumpMapIDs..." << endl;
  312. mVsq.dumpMapIDs();
  313. cout << "vConnect::synthesize; calling mVsq.dumpMapIDs...done" << endl;
  314. #endif
  315. int sampleRate = Configuration::getDefaultSampleRate();
  316. aperiodicityLength = fftLength = getFFTLengthForStar( sampleRate );
  317. // 準備2.合成に必要なローカル変数の初期化
  318. beginFrame = mVsq.events.eventList[0]->beginFrame;
  319. frameLength = mEndFrame - beginFrame;
  320. double framePeriod = Configuration::getMilliSecondsPerFrame();
  321. waveLength = (long int)(frameLength * framePeriod * sampleRate / 1000);
  322. wave = new double[waveLength];
  323. memset(wave, 0, sizeof(double) * waveLength);
  324. f0 = new double[frameLength];
  325. dynamics = new double[frameLength];
  326. // 準備3.振幅・基本周波数・時刻 t を計算する.
  327. this->calculateF0( f0, dynamics );
  328. // 準備4.合成時刻に必要な情報を整理.
  329. vConnectFrame *frames = new vConnectFrame[frameLength];
  330. vector<vConnectPhoneme *> phonemes;
  331. calculateFrameData(frames, frameLength, phonemes, mVsq, mManagerList, mControlCurves[CurveTypeEnum::BRIGHTNESS], beginFrame);
  332. // 実際の合成.
  333. vConnectArg arg1, arg2;
  334. arg1.frames = frames;
  335. arg1.phonemes = &phonemes;
  336. arg1.beginFrame = 0;
  337. arg1.endFrame = frameLength;
  338. arg1.f0 = f0;
  339. arg1.dynamics = dynamics;
  340. arg1.fftLength = fftLength;
  341. arg1.wave = wave;
  342. arg1.waveLength = waveLength;
  343. arg1.eventList = &(mVsq.events);
  344. arg1.controlCurves = &mControlCurves;
  345. arg1.frameOffset = beginFrame;
  346. printf("begin synthesis..\n");
  347. clock_t cl = clock();
  348. #ifdef STND_MULTI_THREAD
  349. Thread *hThread[2];
  350. #ifdef _DEBUG
  351. cout << "vConnect::synthesize; STND_MULTI_THREAD" << endl;
  352. #endif
  353. hMutex = new Mutex();
  354. hFFTWMutex = new Mutex();
  355. #ifdef _DEBUG
  356. cout << "vConnect::synthesize; mutex created: hFFTWMutex" << endl;
  357. #endif
  358. arg2 = arg1;
  359. int i, maxCount, c;
  360. double currentTime = 0.0;
  361. for(i = 0, c = 0, currentTime = 0.0; i < frameLength; )
  362. {
  363. if(f0[i] < 0.0) {
  364. i++;
  365. currentTime = (double)i * framePeriod / 1000.0;
  366. continue;
  367. }
  368. f0[i] = (f0[i] == 0.0)? DEFAULT_F0 : f0[i];
  369. double T = 1.0 / f0[i];
  370. currentTime += T;
  371. i = (int)(currentTime * 1000.0 / framePeriod);
  372. c++;
  373. }
  374. maxCount = c;
  375. for(i = 0, c = 0, currentTime = 0.0; c < maxCount / 2; )
  376. {
  377. if(f0[i] < 0.0) {
  378. i++;
  379. currentTime = (double)i * framePeriod / 1000.0;
  380. continue;
  381. }
  382. double T = 1.0 / f0[i];
  383. currentTime += T;
  384. i = (int)(currentTime * 1000.0 / framePeriod);
  385. c++;
  386. }
  387. arg1.endFrame = i;
  388. arg2.endFrame -= i;
  389. arg2.beginFrame = 0;
  390. arg2.dynamics += i;
  391. arg2.f0 += i;
  392. arg2.frames += i;
  393. arg2.wave += (int)(currentTime * sampleRate);
  394. arg2.waveLength -= (int)(currentTime * sampleRate);
  395. hThread[0] = new Thread( synthesizeFromList, &arg1 );
  396. hThread[1] = new Thread( synthesizeFromList, &arg2 );
  397. hThread[0]->join();
  398. hThread[1]->join();
  399. delete hThread[0];
  400. delete hThread[1];
  401. delete hMutex;
  402. delete hFFTWMutex;
  403. hMutex = NULL;
  404. hFFTWMutex = NULL;
  405. #else
  406. #ifdef _DEBUG
  407. cout << "vConnect::synthesize; not STND_MULTI_THREAD" << endl;
  408. #endif
  409. synthesizeFromList(&arg1);
  410. #endif
  411. printf("Done: elapsed time = %f[s] for %f[s]'s synthesis.\n", (double)(clock() - cl) / CLOCKS_PER_SEC, framePeriod * frameLength / 1000.0);
  412. // 波形のノーマライズ(振幅の絶対値が 1.0 を超えたら絶対値を 1.0 に丸める).
  413. for(int i = 0; i < waveLength; i++)
  414. {
  415. wave[i] = max(-1.0, min(1.0, wave[i]));
  416. }
  417. // ファイルに書き下す.
  418. WaveBuffer::writeWaveFile( output, wave, waveLength, (double)beginFrame * framePeriod / 1000.0 );
  419. for(int i = 0; i < frameLength; i++)
  420. {
  421. list<vConnectData *>::iterator j;
  422. for(j = frames[i].dataList.begin(); j != frames[i].dataList.end(); j++)
  423. {
  424. delete (*j);
  425. }
  426. }
  427. delete[] frames;
  428. delete[] wave;
  429. delete[] f0;
  430. delete[] dynamics;
  431. }
  432. /*corpusManager::phoneme* Synthesizer::getPhoneme(string lyric, int singerIndex, vector<corpusManager *> *managers)
  433. {
  434. corpusManager::phoneme *ret = NULL;
  435. if( singerIndex < managers->size() )
  436. {
  437. ret = (*managers)[singerIndex]->getPhoneme( lyric );
  438. }
  439. return ret;
  440. }*/
  441. int getFirstItem(
  442. Event **p1,
  443. Event **p2,
  444. corpusManager::phoneme **ph1,
  445. corpusManager::phoneme **ph2,
  446. Sequence *vsq,
  447. vector<corpusManager *> &managers,
  448. int beginFrame )
  449. {
  450. int ret = vsq->events.eventList.size();
  451. for( int i = 0; i < vsq->events.eventList.size(); i++ )
  452. {
  453. if( vsq->events.eventList[i]->beginFrame <= beginFrame &&
  454. beginFrame < vsq->events.eventList[i]->endFrame )
  455. {
  456. *p1 = vsq->events.eventList[i];
  457. *p2 = (i + 1 < vsq->events.eventList.size()) ? vsq->events.eventList[i+1] : NULL;
  458. ret = i;
  459. if( *p1 )
  460. {
  461. *ph1 = managers[(*p1)->singerIndex]->getPhoneme((*p1)->lyricHandle.getLyric(), (*p1)->note);
  462. }
  463. if( *p2 )
  464. {
  465. *ph2 = managers[(*p2)->singerIndex]->getPhoneme((*p2)->lyricHandle.getLyric(), (*p2)->note);
  466. }
  467. break;
  468. }
  469. }
  470. return ret;
  471. }
  472. int calculateMelCepstrum( float *dst, int fftLength, list<vConnectData *> &frames )
  473. {
  474. int ret = 0;
  475. memset( dst, 0, sizeof( float ) * fftLength );
  476. list<vConnectData *>::iterator i;
  477. for( i = frames.begin(); i != frames.end(); i++ )
  478. {
  479. if( (*i)->phoneme->getMode() != VCNT_COMPRESSED )
  480. {
  481. continue;
  482. }
  483. int length;
  484. float *data;
  485. data = (*i)->phoneme->getMelCepstrum( (*i)->index, &length );
  486. for( int j = 0; j < length; j++ )
  487. {
  488. dst[j] += (*i)->morphRatio * data[j];
  489. }
  490. ret = max( ret, length );
  491. }
  492. return ret;
  493. }
  494. void calculateResidual(double *dst, int fftLength, list<vConnectData *> &frames, Map<vConnectPhoneme *, vorbisFile *> &vorbisMap)
  495. {
  496. memset(dst, 0, sizeof(double) * fftLength);
  497. float **pcm_channels;
  498. for(list<vConnectData *>::iterator i = frames.begin(); i != frames.end(); i++)
  499. {
  500. int count = 0;
  501. Map<vConnectPhoneme *, vorbisFile *>::iterator itr = vorbisMap.find((*i)->phoneme);
  502. if(itr == vorbisMap.end())
  503. {
  504. continue;
  505. }
  506. else
  507. {
  508. // データが前のと一緒
  509. if(itr->second->prevPos == (*i)->index)
  510. {
  511. for(int j = 0; j < fftLength; j++)
  512. {
  513. dst[j] += itr->second->buf[j] * (*i)->morphRatio;
  514. }
  515. continue;
  516. }
  517. // 現在位置とインデックスがずれるのでシークしないとだめ
  518. if(itr->second->pos != (*i)->index)
  519. {
  520. if(ov_pcm_seek_lap(&(itr->second->ovf), (*i)->index * fftLength))
  521. {
  522. // シークに失敗
  523. continue;
  524. }
  525. }
  526. while(count < fftLength)
  527. {
  528. int bitStream;
  529. long samples = ov_read_float(&(itr->second->ovf), &pcm_channels, fftLength - count, &bitStream);
  530. // 読み込み失敗.
  531. if(samples <= 0){ break; }
  532. for(int j = 0, k = count; j < samples && k < fftLength; j++, k++)
  533. {
  534. itr->second->buf[k] = pcm_channels[0][j];
  535. dst[k] += pcm_channels[0][j] * (*i)->morphRatio;
  536. }
  537. count += samples;
  538. }
  539. // 今の位置と前の位置を更新
  540. itr->second->prevPos = itr->second->pos;
  541. itr->second->pos++;
  542. }
  543. }
  544. }
  545. void calculateRawWave(double *starSpec,
  546. fftw_complex *residualSpec,
  547. int fftLength,
  548. list<vConnectData *> &frames,
  549. double *waveform,
  550. fftw_complex *spectrum,
  551. fftw_complex *cepstrum,
  552. fftw_plan forward_r2c,
  553. fftw_plan forward,
  554. fftw_plan inverse)
  555. {
  556. list<vConnectData *>::iterator i;
  557. double *tmpStar = new double[fftLength];
  558. fftw_complex *tmpRes = new fftw_complex[fftLength];
  559. double framePeriod = Configuration::getMilliSecondsPerFrame();
  560. for(i = frames.begin(); i != frames.end(); i++)
  561. {
  562. if((*i)->phoneme->getMode() != VCNT_RAW)
  563. {
  564. // 波形保持形式でない.
  565. continue;
  566. }
  567. (*i)->phoneme->getOneFrameWorld(tmpStar, tmpRes, (*i)->index * framePeriod / 1000.0, fftLength, waveform, spectrum, cepstrum, forward_r2c, forward, inverse);
  568. for(int j = 0; j < fftLength; j++)
  569. {
  570. starSpec[j] *= pow(tmpStar[j], (*i)->morphRatio);
  571. }
  572. for(int j = 0; j <= fftLength / 2; j++)
  573. {
  574. residualSpec[j][0] += tmpRes[j][0] * (*i)->morphRatio;
  575. residualSpec[j][1] += tmpRes[j][1] * (*i)->morphRatio;
  576. }
  577. }
  578. delete[] tmpRes;
  579. delete[] tmpStar;
  580. }
  581. void appendNoise(double *wave, int length, double ratio, int *c)
  582. {
  583. for(int i = 0; i < length; i++)
  584. {
  585. (*c)++;
  586. wave[i] += ratio * Synthesizer::noiseWave[*c];
  587. *c = *c % NOISE_LEN;
  588. }
  589. }
  590. ThreadWorkerReturnType ThreadWorkerDeclspec synthesizeFromList( void *arg )
  591. {
  592. vConnectArg *p = (vConnectArg *)arg;
  593. // 波形の復元時に FFTW を使う上で必要なメモリの確保.
  594. fftw_complex *spectrum = new fftw_complex[p->fftLength];
  595. fftw_complex *cepstrum = new fftw_complex[p->fftLength];
  596. fftw_complex *residual = new fftw_complex[p->fftLength];
  597. double *starSpec = new double[p->fftLength];
  598. double *waveform = new double[p->fftLength];
  599. double *impulse = new double[p->fftLength];
  600. float *melCepstrum = new float[p->fftLength];
  601. int cepstrumLength;
  602. // この処理はスレッドセーフでない.
  603. #ifdef STND_MULTI_THREAD
  604. if( hFFTWMutex )
  605. {
  606. hFFTWMutex->lock();
  607. }
  608. #endif
  609. fftw_plan forward = fftw_plan_dft_1d(p->fftLength, spectrum, cepstrum, FFTW_FORWARD, FFTW_ESTIMATE);
  610. fftw_plan inverse = fftw_plan_dft_1d(p->fftLength, cepstrum, spectrum, FFTW_BACKWARD, FFTW_ESTIMATE);
  611. fftw_plan forward_r2c = fftw_plan_dft_r2c_1d(p->fftLength, starSpec, residual, FFTW_ESTIMATE);
  612. fftw_plan inverse_c2r = fftw_plan_dft_c2r_1d(p->fftLength, spectrum, impulse, FFTW_ESTIMATE);
  613. fftw_plan forward_r2c_raw = fftw_plan_dft_r2c_1d(p->fftLength, waveform, cepstrum, FFTW_ESTIMATE);
  614. #ifdef STND_MULTI_THREAD
  615. if( hFFTWMutex )
  616. {
  617. hFFTWMutex->unlock();
  618. }
  619. #endif
  620. // 検索用ハッシュ
  621. Map<vConnectPhoneme *, vorbisFile *> vorbisMap;
  622. for( int i = 0; i < p->phonemes->size(); i++ )
  623. {
  624. if( !(*(p->phonemes))[i] )
  625. {
  626. continue;
  627. }
  628. vorbisFile *vf = new vorbisFile(p->fftLength);
  629. if( (*(p->phonemes))[i]->vorbisOpen(&(vf->ovf) ) )
  630. {
  631. vorbisMap.insert( make_pair( (*(p->phonemes))[i], vf ) );
  632. }
  633. else
  634. {
  635. delete vf;
  636. }
  637. }
  638. //================================================================================================= ↑前処理
  639. int currentPosition;
  640. int currentFrame = p->beginFrame;
  641. double currentTime = 0.0;
  642. double T;
  643. int genIndex = 0;
  644. int breIndex = 0;
  645. int noiseCount = 0;
  646. // 合成処理
  647. double framePeriod = Configuration::getMilliSecondsPerFrame();
  648. int sampleRate = Configuration::getDefaultSampleRate();
  649. while( currentFrame < p->endFrame )
  650. {
  651. double currentF0;
  652. if(p->f0[currentFrame] < 0) {
  653. currentFrame++;
  654. currentTime = (double)currentFrame * framePeriod / 1000.0;
  655. continue;
  656. }
  657. currentF0 = (p->f0[currentFrame] == 0.0) ? DEFAULT_F0 : p->f0[currentFrame];
  658. T = 1.0 / currentF0;
  659. // コントロールトラックのインデックスを該当箇所まで進める.
  660. while( currentFrame + p->frameOffset > (*(p->controlCurves))[CurveTypeEnum::GENDER][genIndex].frameTime )
  661. {
  662. genIndex++;
  663. }
  664. while( currentFrame + p->frameOffset > (*(p->controlCurves))[CurveTypeEnum::BRETHINESS][breIndex].frameTime )
  665. {
  666. breIndex++;
  667. }
  668. /* ToDo : MelCepstrum の合成結果を melCepstrum に書き込む.
  669. 残差波形の合成結果を starSpec に書き込む. */
  670. list<vConnectData *> *frames = &(p->frames[currentFrame].dataList);
  671. cepstrumLength =
  672. calculateMelCepstrum( melCepstrum,
  673. p->fftLength,
  674. *frames );
  675. calculateResidual( starSpec,
  676. p->fftLength,
  677. *frames,
  678. vorbisMap );
  679. if(cepstrumLength > 0)
  680. {
  681. // メルケプストラムを impulse に展開.
  682. vConnectUtility::extractMelCepstrum(
  683. impulse,
  684. p->fftLength,
  685. melCepstrum,
  686. cepstrumLength,
  687. spectrum,
  688. impulse,
  689. inverse_c2r,
  690. sampleRate );
  691. } else {
  692. for(int k = 0; k <= p->fftLength / 2; k++)
  693. {
  694. impulse[k] = 1.0;
  695. }
  696. }
  697. // BRE の値によりノイズを励起信号に加算する.
  698. appendNoise( starSpec, (int)min( p->fftLength, T * sampleRate ), (*(p->controlCurves))[CurveTypeEnum::BRETHINESS][breIndex].value / 128.0, &noiseCount );
  699. // starSpec -> residual DFT を実行する.
  700. fftw_execute(forward_r2c);
  701. // 合成単位に波形が含まれる場合分析して加算する.
  702. calculateRawWave(impulse, residual, p->fftLength, *frames, waveform, spectrum, cepstrum, forward_r2c_raw, forward, inverse);
  703. // Gender Factor を適用したスペクトルを starSpec に書き込む.
  704. double stretchRatio = pow(2.0 , (double)((*(p->controlCurves))[CurveTypeEnum::GENDER][genIndex].value - 64) / 64.0);
  705. vConnectUtility::linearStretch(starSpec, impulse, stretchRatio, p->fftLength / 2 + 1);
  706. // 合成パワースペクトルから最小位相応答を計算.
  707. getMinimumPhaseSpectrum(
  708. starSpec,
  709. spectrum,
  710. cepstrum,
  711. p->fftLength,
  712. forward,
  713. inverse );
  714. // 励起信号スペクトルと周波数領域での掛け算.
  715. for(int k = 0; k <= p->fftLength / 2; k++)
  716. {
  717. double real = spectrum[k][0] * residual[k][0] - spectrum[k][1] * residual[k][1];
  718. double imag = spectrum[k][1] * residual[k][0] + spectrum[k][0] * residual[k][1];
  719. spectrum[k][0] = real;
  720. spectrum[k][1] = imag;
  721. }
  722. // 実波形に直す.
  723. fftw_execute(inverse_c2r);
  724. currentPosition = (int)(currentTime * sampleRate);
  725. for( int k = 0; k < p->fftLength / 2 && currentPosition < p->waveLength; k++, currentPosition++ )
  726. {
  727. p->wave[currentPosition] += impulse[k] / p->fftLength * p->dynamics[currentFrame];
  728. }
  729. currentTime += T;
  730. currentFrame = (int)(currentTime * 1000.0 / framePeriod);
  731. }
  732. //================================================================================================= ↓後処理
  733. Map<vConnectPhoneme *, vorbisFile*>::iterator i;
  734. for( i = vorbisMap.begin(); i != vorbisMap.end(); i++) {
  735. ov_clear( &(i->second->ovf) );
  736. delete i->second;
  737. }
  738. delete[] melCepstrum;
  739. delete[] impulse;
  740. delete[] waveform;
  741. delete[] starSpec;
  742. delete[] residual;
  743. delete[] cepstrum;
  744. delete[] spectrum;
  745. // この処理はスレッドセーフでない.
  746. #ifdef STND_MULTI_THREAD
  747. if( hFFTWMutex )
  748. {
  749. hFFTWMutex->lock();
  750. }
  751. #endif
  752. fftw_destroy_plan( forward );
  753. fftw_destroy_plan( inverse );
  754. fftw_destroy_plan( inverse_c2r );
  755. fftw_destroy_plan( forward_r2c );
  756. fftw_destroy_plan( forward_r2c_raw );
  757. #ifdef STND_MULTI_THREAD
  758. if( hFFTWMutex )
  759. {
  760. hFFTWMutex->unlock();
  761. }
  762. #endif
  763. Thread::tellThreadEnd();
  764. return 0;
  765. }
  766. void Synthesizer::calculateVsqInfo( void )
  767. {
  768. // 書きづらいので
  769. vector<Event *> *events = &(mVsq.events.eventList);
  770. string temp;
  771. //vector<UtauDB*> *pDBs = this->vsq.getVoiceDBs();
  772. UtauDB* voiceDB;
  773. float msPreUtterance, msVoiceOverlap;
  774. int singerIndex = 0;
  775. mEndFrame = 0;
  776. /////////
  777. // 前から後ろをチェック
  778. double framePeriod = Configuration::getMilliSecondsPerFrame();
  779. for( unsigned int i = 0; i < events->size(); i++ )
  780. {
  781. Event *itemi = mVsq.events.eventList[i];
  782. // タイプ判定
  783. while( itemi->type == "Singer" )
  784. {
  785. // 歌手なら歌手番号拾ってきて
  786. singerIndex = mVsq.getSingerIndex( itemi->iconHandle.getIDS() );
  787. // 自分を消して
  788. vector<Event*>::iterator it = events->begin();
  789. int j = 0;
  790. while( it != events->end() )
  791. {
  792. if( itemi == (*it) ) break;
  793. j++;
  794. it++;
  795. }
  796. if( it != events->end() )
  797. {
  798. events->erase( it );
  799. SAFE_DELETE( itemi );
  800. }
  801. // ( i 番目今消しちゃったから次に進んでるのと一緒だから )
  802. if( i >= events->size() )
  803. {
  804. break;
  805. }
  806. // 次の音符へ
  807. itemi = mVsq.events.eventList[i];
  808. }
  809. if( singerIndex < 0 || singerIndex >= UtauDBManager::size() )
  810. {
  811. continue;
  812. }
  813. voiceDB = UtauDBManager::get( singerIndex );
  814. // 原音設定の反映
  815. temp = itemi->lyricHandle.getLyric();
  816. msPreUtterance = itemi->utauSetting.msPreUtterance;
  817. msVoiceOverlap = itemi->utauSetting.msVoiceOverlap;
  818. voiceDB->getParams( itemi->utauSetting, temp, itemi->note );
  819. itemi->utauSetting.msPreUtterance = msPreUtterance;
  820. itemi->utauSetting.msVoiceOverlap = msVoiceOverlap;
  821. // 空白文字が存在したときはVCV音素片
  822. itemi->isVCV = ( temp.find( " " ) != string::npos );
  823. // 休符の文字はとりあえず 'R', 'r' を対象にしてUTAUパラメタを初期化しておこう.
  824. itemi->isRest = ( temp.compare( "R" ) == 0 || temp.compare( "r" ) == 0);
  825. if(itemi->isRest){
  826. itemi->utauSetting.msPreUtterance = itemi->utauSetting.msVoiceOverlap = 0.0;
  827. }
  828. // 開始位置の計算
  829. itemi->beginFrame = (long)( (
  830. mVsq.vsqTempoBp.tickToSecond( itemi->tick ) * 1000.0 - itemi->utauSetting.msPreUtterance
  831. * pow( 2.0, ( 64.0 - itemi->velocity ) / 64.0 ) ) / framePeriod );
  832. // ポルタメントが0%の場合適当な値を入れておく
  833. if( itemi->portamentoLength < 2 )
  834. itemi->portamentoLength = 2;
  835. itemi->singerIndex = singerIndex;
  836. }
  837. /////////
  838. // 後ろから前をチェック
  839. for( unsigned int i = 0; i < events->size(); i++ ){
  840. // まず Tick 時刻から終了時刻を計算
  841. (*events)[i]->endFrame = (long)(
  842. ( mVsq.vsqTempoBp.tickToSecond( mVsq.events.eventList[i]->tick + mVsq.events.eventList[i]->length ) * 1000.0 ) / framePeriod );
  843. // 一個前の音符がある場合,連続性のチェック
  844. if( i ){
  845. // i 番目の音符が,i - 1 番目の音符が終わる前に始まる場合連続とみなす
  846. (*events)[i-1]->isContinuousBack = ( (*events)[i]->beginFrame <= (*events)[i-1]->endFrame );
  847. ////* 連続時のオーバーラップの設定 */
  848. if( (*events)[i-1]->isContinuousBack )
  849. {
  850. // i 番目が CV 音素片の場合
  851. if( !(*events)[i]->isVCV )
  852. {
  853. // まず i 番目の先行発音を i - 1 番目に適用する
  854. (*events)[i-1]->endFrame -= (long)( (*events)[i]->utauSetting.msPreUtterance
  855. * pow( 2.0, ( 64.0 - (*events)[i]->velocity ) / 64.0 ) / framePeriod );
  856. // さらにオーバーラップも適用する
  857. (*events)[i-1]->endFrame += (long)( (*events)[i]->utauSetting.msVoiceOverlap / framePeriod );
  858. }
  859. else
  860. {
  861. if( (*events)[i-1]->endFrame - (*events)[i-1]->beginFrame > 20 )
  862. {
  863. (*events)[i-1]->endFrame -= 20;
  864. }
  865. else
  866. {
  867. (*events)[i-1]->endFrame = (*events)[i-1]->beginFrame;
  868. }
  869. }
  870. // 最後にセーフガードとして短くなりすぎないようにチェック
  871. if( (*events)[i-1]->endFrame < (*events)[i-1]->beginFrame )
  872. {
  873. (*events)[i-1]->endFrame = (*events)[i-1]->beginFrame;
  874. }
  875. }
  876. }
  877. }
  878. for( unsigned int i = 0; i < events->size(); i++ )
  879. {
  880. if( mEndFrame < (*events)[i]->endFrame )
  881. {
  882. mEndFrame = (*events)[i]->endFrame;
  883. }
  884. }
  885. // コントロールカーブは vsq 管理クラスにやってもらう
  886. mControlCurves.resize( mVsq.controlCurves.size() );
  887. for( unsigned int i = 0; i < mControlCurves.size(); i++ )
  888. {
  889. mVsq.controlCurves[i].getList( mControlCurves[i], mVsq.vsqTempoBp );
  890. }
  891. }
  892. void Synthesizer::calculateF0( double *f0, double *dynamics )
  893. {
  894. double pitch_change, tmp, vibratoTheta = 0.0, vibratoRate, vibratoDepth;
  895. long beginFrame = mVsq.events.eventList[0]->beginFrame;
  896. long frameLength = mEndFrame - beginFrame;
  897. long index = 0;
  898. long portamentoBegin, portamentoLength;
  899. long previousEndFrame = LONG_MIN, vibratoBeginFrame = 0, noteBeginFrame;
  900. int pitIndex = 0, pbsIndex = 0, dynIndex = 0; // ControlCurve Index
  901. double framePeriod = Configuration::getMilliSecondsPerFrame();
  902. for( unsigned int i = 0; i < mVsq.events.eventList.size(); i++ )
  903. {
  904. Event *itemi = mVsq.events.eventList[i];
  905. // デフォルト値で埋める
  906. for( ; index < itemi->beginFrame - beginFrame && index < frameLength; index++ )
  907. {
  908. f0[index] = -1.0;//0.0;
  909. dynamics[index] = 0.0;
  910. }
  911. // 後続のノートがあるかどうか
  912. if( !itemi->isContinuousBack )
  913. {
  914. // ないなら,ポルタメントの開始位置を設定
  915. portamentoBegin = itemi->endFrame - 50;
  916. if( portamentoBegin < itemi->beginFrame )
  917. {
  918. portamentoBegin = itemi->beginFrame;
  919. }
  920. portamentoBegin -= beginFrame;
  921. }
  922. else
  923. {
  924. // ありえない開始位置にしておくよ!!
  925. portamentoBegin = LONG_MAX;
  926. }
  927. // vibrato 開始位置は元々の音符開始位置近辺から計算する.そこまで厳密じゃなくていいよね.
  928. if( previousEndFrame > itemi->beginFrame - beginFrame )
  929. {
  930. vibratoBeginFrame = previousEndFrame;
  931. vibratoBeginFrame += (long)( 1000.0 * mVsq.vsqTempoBp.tickToSecond( itemi->vibratoDelay ) /framePeriod );
  932. }
  933. else
  934. {
  935. vibratoBeginFrame = itemi->beginFrame - beginFrame;
  936. vibratoBeginFrame += (long)( 1000.0 * mVsq.vsqTempoBp.tickToSecond( itemi->vibratoDelay ) /framePeriod );
  937. }
  938. // ノート・ビブラート・微細振動を書く
  939. for( ; index < itemi->endFrame - beginFrame && index < frameLength; index++ )
  940. {
  941. // ピッチetcカーブに格納されている値の内どれを使うか?
  942. while( index + beginFrame > mControlCurves[CurveTypeEnum::PITCH_BEND][pitIndex].frameTime )
  943. {
  944. pitIndex++;
  945. }
  946. while( index + beginFrame > mControlCurves[CurveTypeEnum::PITCH_BEND_SENS][pbsIndex].frameTime )
  947. {
  948. pbsIndex++;
  949. }
  950. while( index + beginFrame > mControlCurves[CurveTypeEnum::DYNAMICS][dynIndex].frameTime )
  951. {
  952. dynIndex++;
  953. }
  954. pitch_change = pow( 2, (double)mControlCurves[CurveTypeEnum::PITCH_BEND][pitIndex].value / 8192.0 * (double)mControlCurves[CurveTypeEnum::PITCH_BEND_SENS][pbsIndex].value / 12.0 );
  955. f0[index] = mNoteFrequency[itemi->note] * pitch_change * getPitchFluctuation( (double)index * framePeriod / 1000.0 );
  956. dynamics[index] = (double)mControlCurves[CurveTypeEnum::DYNAMICS][dynIndex].value / 64.0;
  957. if( index > portamentoBegin )
  958. {
  959. dynamics[index] *= 1.0 - (double)( index - portamentoBegin ) / 50.0;
  960. }
  961. /* Vibrato */
  962. if( index > vibratoBeginFrame )
  963. {
  964. double pos = (double)(index - vibratoBeginFrame ) / (double)( itemi->endFrame - beginFrame - vibratoBeginFrame );
  965. vibratoRate = mVibrato[itemi->vibratoHandle.getVibratoRate( pos )];
  966. vibratoDepth = (double)itemi->vibratoHandle.getVibratoDepth( pos ) * 2.5 / 127.0 / 2.0;
  967. vibratoTheta += vibratoRate * framePeriod / 1000.0;
  968. f0[index] *= pow( 2.0, 1.0 / 12.0 * vibratoDepth * sin( vibratoTheta ) );
  969. if( vibratoTheta > 2.0 * ST_PI )
  970. {
  971. vibratoTheta -= 2.0 * ST_PI;
  972. }
  973. }
  974. else
  975. {
  976. vibratoTheta = 0.0;
  977. }
  978. }
  979. previousEndFrame = itemi->endFrame - beginFrame;
  980. }
  981. previousEndFrame = LONG_MIN;
  982. // ポルタメントを描きます.(ビブラートとは実は順番依存)
  983. for( unsigned int i = 0; i < mVsq.events.eventList.size(); i++ )
  984. {
  985. Event *itemi = mVsq.events.eventList[i];
  986. if( !itemi->isContinuousBack )
  987. {
  988. continue;
  989. }
  990. else
  991. {
  992. // 後続がいる場合は必要なパラメータを計算してポルタメントを書く
  993. if( previousEndFrame > itemi->beginFrame )
  994. {
  995. noteBeginFrame = previousEndFrame;
  996. }
  997. else
  998. {
  999. if( itemi->isVCV )
  1000. {
  1001. noteBeginFrame = (long)( mVsq.vsqTempoBp.tickToSecond( itemi->tick ) * 1000.0 / framePeriod );
  1002. }
  1003. else
  1004. {
  1005. noteBeginFrame = itemi->beginFrame;
  1006. }
  1007. }
  1008. portamentoBegin = noteBeginFrame
  1009. + (long)((double)(itemi->endFrame - noteBeginFrame)
  1010. * (1.0 - (double)(itemi->portamentoLength) / 100.0));
  1011. tmp = mNoteFrequency[mVsq.events.eventList[i + 1]->note] / mNoteFrequency[itemi->note];
  1012. }
  1013. portamentoLength = itemi->endFrame - portamentoBegin;
  1014. double inv_portamentoLength = 1.0 / (double)portamentoLength;
  1015. long frameOffset = portamentoBegin - beginFrame;
  1016. for( long j = 0; j < portamentoLength && j + frameOffset < frameLength; j++ )
  1017. {
  1018. double x = (double)j * inv_portamentoLength;
  1019. double portamentoChangeRate = (sin( ST_PI * 4.0 / 3.0 * x ) * (1.5 - x) / 1.5);
  1020. f0[j + frameOffset] *= pow( tmp, 0.5 * (1.0 - cos( ST_PI * x )) - (double)itemi->portamentoDepth / 100.0 * portamentoChangeRate);
  1021. dynamics[j + frameOffset] *= pow(tmp / fabs(tmp) * 3.0, - (double)itemi->decay / 100.0 * portamentoChangeRate);
  1022. }
  1023. for( long j = portamentoLength; j < portamentoLength * 3 / 2 && j + frameOffset < frameLength; j++ )
  1024. {
  1025. double x = (double)j * inv_portamentoLength;
  1026. double portamentoChangeRate = (sin( ST_PI * 4.0 / 3.0 * x ) * (1.5 - x) / 1.5);
  1027. f0[j + frameOffset] *= pow( tmp, - (double)itemi->portamentoDepth / 100.0 * portamentoChangeRate );
  1028. dynamics[j + frameOffset] *= pow(tmp / fabs(tmp) * 3.0, - (double)itemi->attack / 100.0 * portamentoChangeRate);
  1029. }
  1030. previousEndFrame = itemi->endFrame;
  1031. }
  1032. }