vConnectUtility.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. #include "vConnectUtility.h"
  2. #include <math.h>
  3. #include <float.h>
  4. #include <time.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <vorbis/vorbisenc.h>
  8. #include <list>
  9. using namespace std;
  10. const int maxBitrate = -1;
  11. const int minBitrate = -1;
  12. const int averageBitrate = 80000;
  13. const int bufSize = 4096;
  14. /* 許容する傾きの最大値 */
  15. const double GRAD = 2.0;
  16. /* 変形関数の傾きに対する重み */
  17. const double G_WEIGHT = 1.5;
  18. const char ENCODER_TAG[] = "vConnet";
  19. struct vorbisBuffer {
  20. char *p;
  21. int size;
  22. };
  23. void vConnectUtility::extractMelCepstrum(double *dst, int fftl, float *src, int cepl, fftw_complex *in, double *out, fftw_plan plan, int fs)
  24. {
  25. int i;
  26. for(i = 0; i < cepl; i++)
  27. {
  28. in[i][0] = src[i];
  29. in[i][1] = 0.0;
  30. }
  31. for(; i <= fftl / 2; i++)
  32. {
  33. in[i][0] = in[i][1] = 0.0;
  34. }
  35. fftw_execute(plan);
  36. for(i = 0; i <= fftl / 2; i++)
  37. {
  38. out[i] = exp(out[i]);
  39. }
  40. vConnectUtility::stretchFromMelScale(dst, out, fftl / 2 + 1, fs / 2);
  41. }
  42. void vConnectUtility::stretchToMelScale(double *melSpectrum, const double *spectrum, int spectrumLength, int maxFrequency)
  43. {
  44. double tmp = getMelScale(maxFrequency);
  45. for(int i = 0; i < spectrumLength; i++)
  46. {
  47. double dIndex = getFrequency((double)i / (double)spectrumLength * tmp) / (double)maxFrequency * (double)spectrumLength;
  48. if(dIndex <= spectrumLength-1.0){
  49. melSpectrum[i] = interpolateArray(dIndex, spectrum);
  50. }else{
  51. melSpectrum[i] = spectrum[spectrumLength - 1];
  52. }
  53. }
  54. }
  55. void vConnectUtility::stretchFromMelScale(double *spectrum, const double *melSpectrum, int spectrumLength, int maxFrequency)
  56. {
  57. double tmp = getMelScale(maxFrequency);
  58. for(int i = 0; i < spectrumLength; i++)
  59. {
  60. double dIndex = getMelScale((double)i / (double)spectrumLength * (double)maxFrequency) / tmp * (double)spectrumLength;
  61. if(dIndex <= spectrumLength-1.0){
  62. spectrum[i] = interpolateArray(dIndex, melSpectrum);
  63. }else{
  64. spectrum[i] = melSpectrum[spectrumLength - 1];
  65. }
  66. }
  67. }
  68. void vConnectUtility::linearStretch(double *dst, const double *src, double ratio, int length)
  69. {
  70. for(int i = 0; i < length; i++)
  71. {
  72. double dIndex = (double)i * ratio;
  73. int iIndex = (int)dIndex;
  74. double r = dIndex - (double)iIndex;
  75. if(iIndex < 0 || iIndex >= length - 1) {
  76. dst[i] = 0.0;
  77. } else {
  78. dst[i] = pow(src[iIndex], 1.0 - r) * pow(src[iIndex], r);
  79. }
  80. }
  81. }
  82. double vConnectUtility::interpolateArray( double x, const double *p )
  83. {
  84. int t = (int)x;
  85. double r = x - (double)t;
  86. return ( p[t] * ( 1.0 - r ) + p[t+1] * r );
  87. }
  88. double vConnectUtility::getMelScale(double freq){
  89. double ret = 1127.01048 * log(1 + freq / 700);
  90. return ret;
  91. }
  92. double vConnectUtility::getFrequency(double melScale){
  93. double ret = 700 * (exp(melScale / 1127.01048) - 1);
  94. return ret;
  95. }
  96. void vConnectUtility::extractResidual(fftw_complex *dst, const double *src, int fftLength)
  97. {
  98. dst[0][0] = src[0];
  99. dst[0][1] = 0.0;
  100. for(int i = 1; i < fftLength / 2; i++) {
  101. dst[i][0] = src[2*i-1];
  102. dst[i][1] = src[2*i];
  103. }
  104. dst[fftLength/2][0] = src[fftLength-1];
  105. dst[fftLength/2][1] = 0.0;
  106. }
  107. void vConnectUtility::newOggVorbis(char **dst, int *size, const double *wave, int length, int fs)
  108. {
  109. if(!dst || !size || !wave || length <= 0){ return; }
  110. list<vorbisBuffer*> bufferList;
  111. vorbisBuffer *p;
  112. int eos = 0;
  113. ogg_stream_state streamState;
  114. ogg_page page;
  115. ogg_packet packet, header, headerComment, headerCode;
  116. vorbis_info info;
  117. vorbis_comment comment;
  118. vorbis_dsp_state dspState;
  119. vorbis_block block;
  120. vorbis_info_init(&info);
  121. if(vorbis_encode_init(&info, 1, fs, maxBitrate, averageBitrate, minBitrate))
  122. {
  123. vorbis_info_clear(&info);
  124. return;
  125. }
  126. vorbis_comment_init(&comment);
  127. vorbis_comment_add_tag(&comment, "ENCODER", (char *)ENCODER_TAG);
  128. vorbis_analysis_init(&dspState, &info);
  129. vorbis_block_init(&dspState, &block);
  130. srand(time(NULL));
  131. ogg_stream_init(&streamState, rand());
  132. vorbis_analysis_headerout(&dspState, &comment, &header, &headerComment, &headerCode);
  133. ogg_stream_packetin(&streamState, &header);
  134. ogg_stream_packetin(&streamState, &headerComment);
  135. ogg_stream_packetin(&streamState, &headerCode);
  136. while(!eos){
  137. int bytes = ogg_stream_flush(&streamState, &page);
  138. if(bytes == 0){ break; }
  139. p = new vorbisBuffer;
  140. p->size = page.header_len + page.body_len;
  141. p->p = new char[p->size];
  142. memcpy(p->p, page.header, page.header_len);
  143. memcpy(p->p + page.header_len, page.body, page.body_len);
  144. bufferList.push_back(p);
  145. }
  146. long i, pos = 0;
  147. while(!eos){
  148. float **buffer = vorbis_analysis_buffer(&dspState, bufSize);
  149. for(i = 0; i < bufSize && pos < length; i++, pos++){
  150. buffer[0][i] = wave[pos];
  151. }
  152. vorbis_analysis_wrote(&dspState, i);
  153. while(vorbis_analysis_blockout(&dspState, &block) == 1){
  154. vorbis_analysis(&block, NULL);
  155. vorbis_bitrate_addblock(&block);
  156. while(vorbis_bitrate_flushpacket(&dspState, &packet)){
  157. ogg_stream_packetin(&streamState, &packet);
  158. while(!eos){
  159. int result = ogg_stream_pageout(&streamState, &page);
  160. if(result == 0)break;
  161. p = new vorbisBuffer;
  162. p->size = page.header_len + page.body_len;
  163. p->p = new char[p->size];
  164. memcpy(p->p, page.header, page.header_len);
  165. memcpy(p->p + page.header_len, page.body, page.body_len);
  166. bufferList.push_back(p);
  167. if(ogg_page_eos(&page)){
  168. eos = 1;
  169. }
  170. }
  171. }
  172. }
  173. }
  174. ogg_stream_clear(&streamState);
  175. vorbis_block_clear(&block);
  176. vorbis_dsp_clear(&dspState);
  177. vorbis_comment_clear(&comment);
  178. vorbis_info_clear(&info);
  179. *size = 0;
  180. for(list<vorbisBuffer*>::iterator i = bufferList.begin(); i != bufferList.end(); i++) {
  181. *size += (*i)->size;
  182. }
  183. *dst = new char[*size];
  184. char *cur = *dst;
  185. for(list<vorbisBuffer*>::iterator i = bufferList.begin(); i != bufferList.end(); i++) {
  186. // Endian 依存.
  187. memcpy(cur, (*i)->p, (*i)->size);
  188. cur += (*i)->size;
  189. delete[] (*i)->p;
  190. delete (*i);
  191. }
  192. }
  193. void vConnectUtility::_get_graduation( double* src, double* dst, int length,
  194. double *d1s, double *d2s, double *d1d, double *d2d)
  195. {
  196. /* 導関数の数値を計算 */
  197. double *temp_spectrum = (double*)malloc(sizeof(double) * length);
  198. memset( temp_spectrum,0,sizeof(double)*length );
  199. for( int i = 1; i < length; i++ )
  200. temp_spectrum[i] = fabs( dst[i] - dst[i-1] );
  201. for( int i = 0; i < length - 1; i++ )
  202. d1d[i] = 0.5 * ( temp_spectrum[i] + temp_spectrum[i+1] );
  203. d1d[length-1] = 0.0;
  204. for( int i = 1; i < length; i++ )
  205. temp_spectrum[i] = fabs( d1d[i] - d1d[i-1] );
  206. for( int i = 0; i < length - 1; i++ )
  207. d2d[i] = 0.5 * ( temp_spectrum[i] + temp_spectrum[i+1] );
  208. d2d[length-1] = 0.0;
  209. for( int i = 1; i < length; i++ )
  210. temp_spectrum[i] = fabs( src[i] - src[i-1] );
  211. for( int i = 0; i < length - 1; i++ )
  212. d1s[i] = 0.5 * ( temp_spectrum[i] + temp_spectrum[i+1] );
  213. d1s[length-1] = 0.0;
  214. for( int i = 1; i < length; i++ )
  215. temp_spectrum[i] = fabs( d1s[i] - d1s[i-1] );
  216. for( int i = 0; i < length - 1; i++ )
  217. d2s[i] = 0.5 * ( temp_spectrum[i] + temp_spectrum[i+1] );
  218. d2s[length-1] = 0.0;
  219. free(temp_spectrum);
  220. }
  221. void vConnectUtility::_log_normalize_infinite( double* dst, int length )
  222. {
  223. /* 正規化の結果は0~1の範囲になる. */
  224. int i;
  225. double min_v = DBL_MAX, max_v = DBL_MIN;
  226. for( i = 0; i < length; i++ ){
  227. if( dst[i] < min_v )
  228. min_v = dst[i];
  229. if( dst[i] > max_v )
  230. max_v = dst[i];
  231. }
  232. for( i = 0; i < length; i++ )
  233. dst[i] = log( dst[i] / min_v ) / log( max_v );
  234. }
  235. double vConnectUtility::_get_cost( double* src, double* dst, int i, int j, int n, int m,
  236. double *d1d, double *d2d, double *d1s, double *d2s)
  237. {
  238. int k;
  239. double temp, rate, sum;
  240. if( n > m * GRAD || m > n * GRAD )
  241. return DBL_MAX;
  242. rate = temp = (double)n / (double)m;
  243. sum = 0.0;
  244. if( rate > 1.0 )
  245. rate = 1.0 / rate;
  246. if( m >= n ){
  247. for( k = 1; k <= m; k++ ){
  248. sum += fabs( interpolateArray( (double)i + (double)k * temp, src ) - dst[j+k] );
  249. sum += fabs( interpolateArray( (double)i + (double)k * temp, d1s ) / temp - d1d[j+k] );
  250. sum += 0.5 * fabs( interpolateArray( (double)i + (double)k * temp, d2s ) / pow( temp, 2.0 ) - d2d[j+k] );
  251. }
  252. temp = m;
  253. temp = sqrt( (double)( m*m + n*n ) ) / temp;
  254. sum *= temp;
  255. }else{
  256. for( k = 1; k <= n; k++ ){
  257. sum += fabs( src[i+k] - interpolateArray( (double)j + (double)k / temp, dst ) );
  258. sum += fabs( d1s[i+k] - interpolateArray( (double)j + (double)k / temp, d1d ) * temp );
  259. sum += 0.5 * fabs( d2s[i+k] - interpolateArray( (double)j + (double)k / temp, d2d ) * pow( temp, 2.0 ) );
  260. }
  261. temp = n;
  262. temp = sqrt( (double)( m*m + n*n ) ) / temp;
  263. sum *= temp;
  264. }
  265. rate = 1.0 + G_WEIGHT * pow( 1.0 - rate, 2.0 );
  266. return sum * rate;
  267. }
  268. void vConnectUtility::calculateMatching( double* T, double* H, double* src_s, double* dst_s, int length )
  269. {
  270. int i, j, k, l, n, m, tx, ty;
  271. double **dpMap;
  272. double tempd, g1, g2;
  273. int **pathX, **pathY;
  274. double* src = (double*)malloc( sizeof(double)*length );
  275. double* dst = (double*)malloc( sizeof(double)*length );
  276. double *d1d = (double*)malloc( sizeof(double)*length );
  277. double *d2d = (double*)malloc( sizeof(double)*length );
  278. double *d1s = (double*)malloc( sizeof(double)*length );
  279. double *d2s = (double*)malloc( sizeof(double)*length );
  280. memcpy( src, src_s, sizeof(double)*length );
  281. memcpy( dst, dst_s, sizeof(double)*length );
  282. /* メモリの確保と値の初期化 */
  283. dpMap = (double **)malloc( sizeof(double *) * length );
  284. pathX = (int **)malloc( sizeof(int *) * length );
  285. pathY = (int **)malloc( sizeof(int *) * length );
  286. for( i = 0; i < length; i++ ){
  287. dpMap[i] = (double *)malloc( sizeof(double) * 1100); //length );
  288. for( j = 0; j < 1100; j++ ) //length; j++ )
  289. dpMap[i][j] = 1000000000.0;
  290. pathX[i] = (int *)malloc( sizeof(int) * 1100); //length );
  291. pathY[i] = (int *)malloc( sizeof(int) * 1100); //length );
  292. memset( pathX[i], -1, sizeof(int) * 1100); //length );
  293. memset( pathY[i], -1, sizeof(int) * 1100); //length );
  294. }
  295. pathX[0][512] = pathY[0][512] = 0;
  296. dpMap[0][512] = fabs( dst[0] - src[0] );
  297. /* 導関数を算出 */
  298. _get_graduation( src, dst, length, d1s, d2s, d1d, d2d );
  299. /* 最短経路を求めてマッチング */
  300. for( i = 0; i < length; i++ ){
  301. for( j = 0; j < length; j++ ){
  302. if( abs(i - j) > 48 ) /* 領域を直指定するなんて... */
  303. continue;
  304. /* 現在点における傾き */
  305. tx = pathX[i][j-i+512]; ty = pathY[i][j-i+512];
  306. if( i == tx || j == ty )
  307. g1 = 1.0;
  308. else
  309. g1 = (double)( j - ty ) / (double)( i - tx );
  310. /* 分割はあまり増やしても意味が無いみたい */
  311. for( n = 1; n < 8; n++ ){
  312. if( i + n >= length )
  313. break;
  314. for( m = 1; m < 8; m++ ){
  315. if( j + m >= length )
  316. break;
  317. /* 目標点における傾き */
  318. g2 = (double)m / (double)n;
  319. if( g2 > g1 )
  320. tempd = dpMap[i][j-i+512] + _get_cost( src, dst, i, j, n, m, d1d, d2d, d1s, d2s ) * g2 / g1;
  321. else
  322. tempd = dpMap[i][j-i+512] + _get_cost( src, dst, i, j, n, m, d1d, d2d, d1s, d2s ) * g1 / g2;
  323. /* 最大値の更新 */
  324. if( i + n < length && j + m < length && dpMap[i+n][j+m-(i+n)+512] > tempd ){
  325. dpMap[i+n][j+m-(i+n)+512] = tempd;
  326. pathX[i+n][j+m-(i+n)+512] = i;
  327. pathY[i+n][j+m-(i+n)+512] = j;
  328. }
  329. }
  330. }
  331. }
  332. }
  333. T[0] = 0.0;
  334. l = j = length - 1;
  335. while( l > 0 && j > 0 ){
  336. tx = pathX[l][j-l+512]; ty = pathY[l][j-l+512];
  337. for( k = l; k > tx; k-- )
  338. T[k] = (double)j - (double)( l - k ) * (double)( j - ty ) / (double)( l - tx );
  339. l = tx; j = ty;
  340. }
  341. H[0] = 0.0;
  342. l = j = length - 1;
  343. while( l > 0 && j > 0 ){
  344. tx = pathX[l][j-l+512]; ty = pathY[l][j-l+512];
  345. for( k = j; k > ty; k-- )
  346. H[k] = (double)j - (double)( j - k ) * (double)( l - tx ) / (double)( j - ty );
  347. l = tx; j = ty;
  348. }
  349. /* if(H){
  350. applyStretching( T, dst, length );
  351. for( i = 0; i < length; i++ )
  352. H[i] = src[i] - dst[i];
  353. }
  354. */
  355. /* メモリのお掃除 */
  356. for( i = 0; i < length; i++ ){
  357. free( dpMap[i] );
  358. free( pathX[i] );
  359. free( pathY[i] );
  360. }
  361. free( dpMap );
  362. free( pathX );
  363. free( pathY );
  364. free( src );
  365. free( dst );
  366. free( d1d );
  367. free( d2d );
  368. free( d1s );
  369. free( d2s );
  370. }