harvest.cpp 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263
  1. //-----------------------------------------------------------------------------
  2. // Copyright 2012 Masanori Morise
  3. // Author: mmorise [at] yamanashi.ac.jp (Masanori Morise)
  4. // Last update: 2018/01/21
  5. //
  6. // F0 estimation based on Harvest.
  7. //-----------------------------------------------------------------------------
  8. #include "world/harvest.h"
  9. #include <math.h>
  10. #include "world/common.h"
  11. #include "world/constantnumbers.h"
  12. #include "world/fft.h"
  13. #include "world/matlabfunctions.h"
  14. //-----------------------------------------------------------------------------
  15. // struct for RawEventByHarvest()
  16. // "negative" means "zero-crossing point going from positive to negative"
  17. // "positive" means "zero-crossing point going from negative to positive"
  18. //-----------------------------------------------------------------------------
  19. typedef struct {
  20. double *negative_interval_locations;
  21. double *negative_intervals;
  22. int number_of_negatives;
  23. double *positive_interval_locations;
  24. double *positive_intervals;
  25. int number_of_positives;
  26. double *peak_interval_locations;
  27. double *peak_intervals;
  28. int number_of_peaks;
  29. double *dip_interval_locations;
  30. double *dip_intervals;
  31. int number_of_dips;
  32. } ZeroCrossings;
  33. namespace {
  34. //-----------------------------------------------------------------------------
  35. // Since the waveform of beginning and ending after decimate include noise,
  36. // the input waveform is extended. This is the processing for the
  37. // compatibility with MATLAB version.
  38. //-----------------------------------------------------------------------------
  39. static void GetWaveformAndSpectrumSub(const double *x, int x_length,
  40. int y_length, double actual_fs, int decimation_ratio, double *y) {
  41. if (decimation_ratio == 1) {
  42. for (int i = 0; i < x_length; ++i) y[i] = x[i];
  43. return;
  44. }
  45. int lag =
  46. static_cast<int>(ceil(140.0 / decimation_ratio) * decimation_ratio);
  47. int new_x_length = x_length + lag * 2;
  48. double *new_y = new double[new_x_length];
  49. for (int i = 0; i < new_x_length; ++i) new_y[i] = 0.0;
  50. double *new_x = new double[new_x_length];
  51. for (int i = 0; i < lag; ++i) new_x[i] = x[0];
  52. for (int i = lag; i < lag + x_length; ++i) new_x[i] = x[i - lag];
  53. for (int i = lag + x_length; i < new_x_length; ++i)
  54. new_x[i] = x[x_length - 1];
  55. decimate(new_x, new_x_length, decimation_ratio, new_y);
  56. for (int i = 0; i < y_length; ++i) y[i] = new_y[lag / decimation_ratio + i];
  57. delete[] new_x;
  58. delete[] new_y;
  59. }
  60. //-----------------------------------------------------------------------------
  61. // GetWaveformAndSpectrum() calculates the downsampled signal and its spectrum
  62. //-----------------------------------------------------------------------------
  63. static void GetWaveformAndSpectrum(const double *x, int x_length,
  64. int y_length, double actual_fs, int fft_size, int decimation_ratio,
  65. double *y, fft_complex *y_spectrum) {
  66. // Initialization
  67. for (int i = 0; i < fft_size; ++i) y[i] = 0.0;
  68. // Processing for the compatibility with MATLAB version
  69. GetWaveformAndSpectrumSub(x, x_length, y_length, actual_fs,
  70. decimation_ratio, y);
  71. // Removal of the DC component (y = y - mean value of y)
  72. double mean_y = 0.0;
  73. for (int i = 0; i < y_length; ++i) mean_y += y[i];
  74. mean_y /= y_length;
  75. for (int i = 0; i < y_length; ++i) y[i] -= mean_y;
  76. for (int i = y_length; i < fft_size; ++i) y[i] = 0.0;
  77. fft_plan forwardFFT =
  78. fft_plan_dft_r2c_1d(fft_size, y, y_spectrum, FFT_ESTIMATE);
  79. fft_execute(forwardFFT);
  80. fft_destroy_plan(forwardFFT);
  81. }
  82. //-----------------------------------------------------------------------------
  83. // GetFilteredSignal() calculates the signal that is the convolution of the
  84. // input signal and band-pass filter.
  85. //-----------------------------------------------------------------------------
  86. static void GetFilteredSignal(double boundary_f0, int fft_size, double fs,
  87. const fft_complex *y_spectrum, int y_length, double *filtered_signal) {
  88. int filter_length_half = matlab_round(fs / boundary_f0 * 2.0);
  89. double *band_pass_filter = new double[fft_size];
  90. NuttallWindow(filter_length_half * 2 + 1, band_pass_filter);
  91. for (int i = -filter_length_half; i <= filter_length_half; ++i)
  92. band_pass_filter[i + filter_length_half] *=
  93. cos(2 * world::kPi * boundary_f0 * i / fs);
  94. for (int i = filter_length_half * 2 + 1; i < fft_size; ++i)
  95. band_pass_filter[i] = 0.0;
  96. fft_complex *band_pass_filter_spectrum = new fft_complex[fft_size];
  97. fft_plan forwardFFT = fft_plan_dft_r2c_1d(fft_size, band_pass_filter,
  98. band_pass_filter_spectrum, FFT_ESTIMATE);
  99. fft_execute(forwardFFT);
  100. // Convolution
  101. double tmp = y_spectrum[0][0] * band_pass_filter_spectrum[0][0] -
  102. y_spectrum[0][1] * band_pass_filter_spectrum[0][1];
  103. band_pass_filter_spectrum[0][1] =
  104. y_spectrum[0][0] * band_pass_filter_spectrum[0][1] +
  105. y_spectrum[0][1] * band_pass_filter_spectrum[0][0];
  106. band_pass_filter_spectrum[0][0] = tmp;
  107. for (int i = 1; i <= fft_size / 2; ++i) {
  108. tmp = y_spectrum[i][0] * band_pass_filter_spectrum[i][0] -
  109. y_spectrum[i][1] * band_pass_filter_spectrum[i][1];
  110. band_pass_filter_spectrum[i][1] =
  111. y_spectrum[i][0] * band_pass_filter_spectrum[i][1] +
  112. y_spectrum[i][1] * band_pass_filter_spectrum[i][0];
  113. band_pass_filter_spectrum[i][0] = tmp;
  114. band_pass_filter_spectrum[fft_size - i - 1][0] =
  115. band_pass_filter_spectrum[i][0];
  116. band_pass_filter_spectrum[fft_size - i - 1][1] =
  117. band_pass_filter_spectrum[i][1];
  118. }
  119. fft_plan inverseFFT = fft_plan_dft_c2r_1d(fft_size,
  120. band_pass_filter_spectrum, filtered_signal, FFT_ESTIMATE);
  121. fft_execute(inverseFFT);
  122. // Compensation of the delay.
  123. int index_bias = filter_length_half + 1;
  124. for (int i = 0; i < y_length; ++i)
  125. filtered_signal[i] = filtered_signal[i + index_bias];
  126. fft_destroy_plan(inverseFFT);
  127. fft_destroy_plan(forwardFFT);
  128. delete[] band_pass_filter_spectrum;
  129. delete[] band_pass_filter;
  130. }
  131. //-----------------------------------------------------------------------------
  132. // CheckEvent() returns 1, provided that the input value is over 1.
  133. // This function is for RawEventByDio().
  134. //-----------------------------------------------------------------------------
  135. static inline int CheckEvent(int x) {
  136. return x > 0 ? 1 : 0;
  137. }
  138. //-----------------------------------------------------------------------------
  139. // ZeroCrossingEngine() calculates the zero crossing points from positive to
  140. // negative.
  141. //-----------------------------------------------------------------------------
  142. static int ZeroCrossingEngine(const double *filtered_signal, int y_length,
  143. double fs, double *interval_locations, double *intervals) {
  144. int *negative_going_points = new int[y_length];
  145. for (int i = 0; i < y_length - 1; ++i)
  146. negative_going_points[i] =
  147. 0.0 < filtered_signal[i] && filtered_signal[i + 1] <= 0.0 ? i + 1 : 0;
  148. negative_going_points[y_length - 1] = 0;
  149. int *edges = new int[y_length];
  150. int count = 0;
  151. for (int i = 0; i < y_length; ++i)
  152. if (negative_going_points[i] > 0)
  153. edges[count++] = negative_going_points[i];
  154. if (count < 2) {
  155. delete[] edges;
  156. delete[] negative_going_points;
  157. return 0;
  158. }
  159. double *fine_edges = new double[count];
  160. for (int i = 0; i < count; ++i)
  161. fine_edges[i] = edges[i] - filtered_signal[edges[i] - 1] /
  162. (filtered_signal[edges[i]] - filtered_signal[edges[i] - 1]);
  163. for (int i = 0; i < count - 1; ++i) {
  164. intervals[i] = fs / (fine_edges[i + 1] - fine_edges[i]);
  165. interval_locations[i] = (fine_edges[i] + fine_edges[i + 1]) / 2.0 / fs;
  166. }
  167. delete[] fine_edges;
  168. delete[] edges;
  169. delete[] negative_going_points;
  170. return count - 1;
  171. }
  172. //-----------------------------------------------------------------------------
  173. // GetFourZeroCrossingIntervals() calculates four zero-crossing intervals.
  174. // (1) Zero-crossing going from negative to positive.
  175. // (2) Zero-crossing going from positive to negative.
  176. // (3) Peak, and (4) dip. (3) and (4) are calculated from the zero-crossings of
  177. // the differential of waveform.
  178. //-----------------------------------------------------------------------------
  179. static void GetFourZeroCrossingIntervals(double *filtered_signal, int y_length,
  180. double actual_fs, ZeroCrossings *zero_crossings) {
  181. int maximum_number = y_length;
  182. zero_crossings->negative_interval_locations = new double[maximum_number];
  183. zero_crossings->positive_interval_locations = new double[maximum_number];
  184. zero_crossings->peak_interval_locations = new double[maximum_number];
  185. zero_crossings->dip_interval_locations = new double[maximum_number];
  186. zero_crossings->negative_intervals = new double[maximum_number];
  187. zero_crossings->positive_intervals = new double[maximum_number];
  188. zero_crossings->peak_intervals = new double[maximum_number];
  189. zero_crossings->dip_intervals = new double[maximum_number];
  190. zero_crossings->number_of_negatives = ZeroCrossingEngine(filtered_signal,
  191. y_length, actual_fs, zero_crossings->negative_interval_locations,
  192. zero_crossings->negative_intervals);
  193. for (int i = 0; i < y_length; ++i) filtered_signal[i] = -filtered_signal[i];
  194. zero_crossings->number_of_positives = ZeroCrossingEngine(filtered_signal,
  195. y_length, actual_fs, zero_crossings->positive_interval_locations,
  196. zero_crossings->positive_intervals);
  197. for (int i = 0; i < y_length - 1; ++i) filtered_signal[i] =
  198. filtered_signal[i] - filtered_signal[i + 1];
  199. zero_crossings->number_of_peaks = ZeroCrossingEngine(filtered_signal,
  200. y_length - 1, actual_fs, zero_crossings->peak_interval_locations,
  201. zero_crossings->peak_intervals);
  202. for (int i = 0; i < y_length - 1; ++i)
  203. filtered_signal[i] = -filtered_signal[i];
  204. zero_crossings->number_of_dips = ZeroCrossingEngine(filtered_signal,
  205. y_length - 1, actual_fs, zero_crossings->dip_interval_locations,
  206. zero_crossings->dip_intervals);
  207. }
  208. static void GetF0CandidateContourSub(const double * const *interpolated_f0_set,
  209. int f0_length, double f0_floor, double f0_ceil, double boundary_f0,
  210. double *f0_candidate) {
  211. double upper = boundary_f0 * 1.1;
  212. double lower = boundary_f0 * 0.9;
  213. for (int i = 0; i < f0_length; ++i) {
  214. f0_candidate[i] = (interpolated_f0_set[0][i] +
  215. interpolated_f0_set[1][i] + interpolated_f0_set[2][i] +
  216. interpolated_f0_set[3][i]) / 4.0;
  217. if (f0_candidate[i] > upper || f0_candidate[i] < lower ||
  218. f0_candidate[i] > f0_ceil || f0_candidate[i] < f0_floor)
  219. f0_candidate[i] = 0.0;
  220. }
  221. }
  222. //-----------------------------------------------------------------------------
  223. // GetF0CandidateContour() calculates the F0 candidate contour in 1-ch signal.
  224. // Calculation of F0 candidates is carried out in GetF0CandidatesSub().
  225. //-----------------------------------------------------------------------------
  226. static void GetF0CandidateContour(const ZeroCrossings *zero_crossings,
  227. double boundary_f0, double f0_floor, double f0_ceil,
  228. const double *temporal_positions, int f0_length, double *f0_candidate) {
  229. if (0 == CheckEvent(zero_crossings->number_of_negatives - 2) *
  230. CheckEvent(zero_crossings->number_of_positives - 2) *
  231. CheckEvent(zero_crossings->number_of_peaks - 2) *
  232. CheckEvent(zero_crossings->number_of_dips - 2)) {
  233. for (int i = 0; i < f0_length; ++i) f0_candidate[i] = 0.0;
  234. return;
  235. }
  236. double *interpolated_f0_set[4];
  237. for (int i = 0; i < 4; ++i)
  238. interpolated_f0_set[i] = new double[f0_length];
  239. interp1(zero_crossings->negative_interval_locations,
  240. zero_crossings->negative_intervals,
  241. zero_crossings->number_of_negatives,
  242. temporal_positions, f0_length, interpolated_f0_set[0]);
  243. interp1(zero_crossings->positive_interval_locations,
  244. zero_crossings->positive_intervals,
  245. zero_crossings->number_of_positives,
  246. temporal_positions, f0_length, interpolated_f0_set[1]);
  247. interp1(zero_crossings->peak_interval_locations,
  248. zero_crossings->peak_intervals, zero_crossings->number_of_peaks,
  249. temporal_positions, f0_length, interpolated_f0_set[2]);
  250. interp1(zero_crossings->dip_interval_locations,
  251. zero_crossings->dip_intervals, zero_crossings->number_of_dips,
  252. temporal_positions, f0_length, interpolated_f0_set[3]);
  253. GetF0CandidateContourSub(interpolated_f0_set, f0_length, f0_floor,
  254. f0_ceil, boundary_f0, f0_candidate);
  255. for (int i = 0; i < 4; ++i) delete[] interpolated_f0_set[i];
  256. }
  257. //-----------------------------------------------------------------------------
  258. // DestroyZeroCrossings() frees the memory of array in the struct
  259. //-----------------------------------------------------------------------------
  260. static void DestroyZeroCrossings(ZeroCrossings *zero_crossings) {
  261. delete[] zero_crossings->negative_interval_locations;
  262. delete[] zero_crossings->positive_interval_locations;
  263. delete[] zero_crossings->peak_interval_locations;
  264. delete[] zero_crossings->dip_interval_locations;
  265. delete[] zero_crossings->negative_intervals;
  266. delete[] zero_crossings->positive_intervals;
  267. delete[] zero_crossings->peak_intervals;
  268. delete[] zero_crossings->dip_intervals;
  269. }
  270. //-----------------------------------------------------------------------------
  271. // GetF0CandidateFromRawEvent() f0 candidate contour in 1-ch signal
  272. //-----------------------------------------------------------------------------
  273. static void GetF0CandidateFromRawEvent(double boundary_f0, double fs,
  274. const fft_complex *y_spectrum, int y_length, int fft_size, double f0_floor,
  275. double f0_ceil, const double *temporal_positions, int f0_length,
  276. double *f0_candidate) {
  277. double *filtered_signal = new double[fft_size];
  278. GetFilteredSignal(boundary_f0, fft_size, fs, y_spectrum,
  279. y_length, filtered_signal);
  280. ZeroCrossings zero_crossings = { 0 };
  281. GetFourZeroCrossingIntervals(filtered_signal, y_length, fs,
  282. &zero_crossings);
  283. GetF0CandidateContour(&zero_crossings, boundary_f0, f0_floor, f0_ceil,
  284. temporal_positions, f0_length, f0_candidate);
  285. DestroyZeroCrossings(&zero_crossings);
  286. delete[] filtered_signal;
  287. }
  288. //-----------------------------------------------------------------------------
  289. // GetRawF0Candidates() calculates f0 candidates in all channels.
  290. //-----------------------------------------------------------------------------
  291. static void GetRawF0Candidates(const double *boundary_f0_list,
  292. int number_of_bands, double actual_fs, int y_length,
  293. const double *temporal_positions, int f0_length,
  294. const fft_complex *y_spectrum, int fft_size, double f0_floor,
  295. double f0_ceil, double **raw_f0_candidates) {
  296. for (int i = 0; i < number_of_bands; ++i)
  297. GetF0CandidateFromRawEvent(boundary_f0_list[i], actual_fs, y_spectrum,
  298. y_length, fft_size, f0_floor, f0_ceil, temporal_positions, f0_length,
  299. raw_f0_candidates[i]);
  300. }
  301. //-----------------------------------------------------------------------------
  302. // DetectF0CandidatesSub1() calculates VUV areas.
  303. //-----------------------------------------------------------------------------
  304. static int DetectOfficialF0CandidatesSub1(const int *vuv,
  305. int number_of_channels, int *st, int *ed) {
  306. int number_of_voiced_sections = 0;
  307. int tmp;
  308. for (int i = 1; i < number_of_channels; ++i) {
  309. tmp = vuv[i] - vuv[i - 1];
  310. if (tmp == 1) st[number_of_voiced_sections] = i;
  311. if (tmp == -1) ed[number_of_voiced_sections++] = i;
  312. }
  313. return number_of_voiced_sections;
  314. }
  315. //-----------------------------------------------------------------------------
  316. // DetectOfficialF0CandidatesSub2() calculates F0 candidates in a frame
  317. //-----------------------------------------------------------------------------
  318. static int DetectOfficialF0CandidatesSub2(const int *vuv,
  319. const double * const *raw_f0_candidates, int index,
  320. int number_of_voiced_sections, const int *st, const int *ed,
  321. int max_candidates, double *f0_list) {
  322. int number_of_candidates = 0;
  323. double tmp_f0;
  324. for (int i = 0; i < number_of_voiced_sections; ++i) {
  325. if (ed[i] - st[i] < 10) continue;
  326. tmp_f0 = 0.0;
  327. for (int j = st[i]; j < ed[i]; ++j)
  328. tmp_f0 += raw_f0_candidates[j][index];
  329. tmp_f0 /= (ed[i] - st[i]);
  330. f0_list[number_of_candidates++] = tmp_f0;
  331. }
  332. for (int i = number_of_candidates; i < max_candidates; ++i) f0_list[i] = 0.0;
  333. return number_of_candidates;
  334. }
  335. //-----------------------------------------------------------------------------
  336. // DetectOfficialF0Candidates() detectes F0 candidates from multi-channel
  337. // candidates.
  338. //-----------------------------------------------------------------------------
  339. static int DetectOfficialF0Candidates(const double * const * raw_f0_candidates,
  340. int number_of_channels, int f0_length, int max_candidates,
  341. double **f0_candidates) {
  342. int number_of_candidates = 0;
  343. int *vuv = new int[number_of_channels];
  344. int *st = new int[number_of_channels];
  345. int *ed = new int[number_of_channels];
  346. int number_of_voiced_sections;
  347. for (int i = 0; i < f0_length; ++i) {
  348. for (int j = 0; j < number_of_channels; ++j)
  349. vuv[j] = raw_f0_candidates[j][i] > 0 ? 1 : 0;
  350. vuv[0] = vuv[number_of_channels - 1] = 0;
  351. number_of_voiced_sections = DetectOfficialF0CandidatesSub1(vuv,
  352. number_of_channels, st, ed);
  353. number_of_candidates = MyMaxInt(number_of_candidates,
  354. DetectOfficialF0CandidatesSub2(vuv, raw_f0_candidates, i,
  355. number_of_voiced_sections, st, ed, max_candidates, f0_candidates[i]));
  356. }
  357. delete[] vuv;
  358. delete[] st;
  359. delete[] ed;
  360. return number_of_candidates;
  361. }
  362. //-----------------------------------------------------------------------------
  363. // OverlapF0Candidates() spreads the candidates to anteroposterior frames.
  364. //-----------------------------------------------------------------------------
  365. static void OverlapF0Candidates(int f0_length, int number_of_candidates,
  366. double **f0_candidates) {
  367. int n = 3;
  368. for (int i = 1; i <= n; ++i)
  369. for (int j = 0; j < number_of_candidates; ++j) {
  370. for (int k = i; k < f0_length; ++k)
  371. f0_candidates[k][j + (number_of_candidates * i)] =
  372. f0_candidates[k - i][j];
  373. for (int k = 0; k < f0_length - i; ++k)
  374. f0_candidates[k][j + (number_of_candidates * (i + n))] =
  375. f0_candidates[k + i][j];
  376. }
  377. }
  378. //-----------------------------------------------------------------------------
  379. // GetBaseIndex() calculates the temporal positions for windowing.
  380. //-----------------------------------------------------------------------------
  381. static void GetBaseIndex(double current_position, const double *base_time,
  382. int base_time_length, double fs, int *base_index) {
  383. // First-aid treatment
  384. int basic_index =
  385. matlab_round((current_position + base_time[0]) * fs + 0.001);
  386. for (int i = 0; i < base_time_length; ++i) base_index[i] = basic_index + i;
  387. }
  388. //-----------------------------------------------------------------------------
  389. // GetMainWindow() generates the window function.
  390. //-----------------------------------------------------------------------------
  391. static void GetMainWindow(double current_position, const int *base_index,
  392. int base_time_length, double fs, double window_length_in_time,
  393. double *main_window) {
  394. double tmp = 0.0;
  395. for (int i = 0; i < base_time_length; ++i) {
  396. tmp = (base_index[i] - 1.0) / fs - current_position;
  397. main_window[i] = 0.42 +
  398. 0.5 * cos(2.0 * world::kPi * tmp / window_length_in_time) +
  399. 0.08 * cos(4.0 * world::kPi * tmp / window_length_in_time);
  400. }
  401. }
  402. //-----------------------------------------------------------------------------
  403. // GetDiffWindow() generates the differentiated window.
  404. // Diff means differential.
  405. //-----------------------------------------------------------------------------
  406. static void GetDiffWindow(const double *main_window, int base_time_length,
  407. double *diff_window) {
  408. diff_window[0] = -main_window[1] / 2.0;
  409. for (int i = 1; i < base_time_length - 1; ++i)
  410. diff_window[i] = -(main_window[i + 1] - main_window[i - 1]) / 2.0;
  411. diff_window[base_time_length - 1] = main_window[base_time_length - 2] / 2.0;
  412. }
  413. //-----------------------------------------------------------------------------
  414. // GetSpectra() calculates two spectra of the waveform windowed by windows
  415. // (main window and diff window).
  416. //-----------------------------------------------------------------------------
  417. static void GetSpectra(const double *x, int x_length, int fft_size,
  418. const int *base_index, const double *main_window,
  419. const double *diff_window, int base_time_length,
  420. const ForwardRealFFT *forward_real_fft, fft_complex *main_spectrum,
  421. fft_complex *diff_spectrum) {
  422. int *safe_index = new int[base_time_length];
  423. for (int i = 0; i < base_time_length; ++i)
  424. safe_index[i] = MyMaxInt(0, MyMinInt(x_length - 1, base_index[i] - 1));
  425. for (int i = 0; i < base_time_length; ++i)
  426. forward_real_fft->waveform[i] = x[safe_index[i]] * main_window[i];
  427. for (int i = base_time_length; i < fft_size; ++i)
  428. forward_real_fft->waveform[i] = 0.0;
  429. fft_execute(forward_real_fft->forward_fft);
  430. for (int i = 0; i <= fft_size / 2; ++i) {
  431. main_spectrum[i][0] = forward_real_fft->spectrum[i][0];
  432. main_spectrum[i][1] = forward_real_fft->spectrum[i][1];
  433. }
  434. for (int i = 0; i < base_time_length; ++i)
  435. forward_real_fft->waveform[i] = x[safe_index[i]] * diff_window[i];
  436. for (int i = base_time_length; i < fft_size; ++i)
  437. forward_real_fft->waveform[i] = 0.0;
  438. fft_execute(forward_real_fft->forward_fft);
  439. for (int i = 0; i <= fft_size / 2; ++i) {
  440. diff_spectrum[i][0] = forward_real_fft->spectrum[i][0];
  441. diff_spectrum[i][1] = forward_real_fft->spectrum[i][1];
  442. }
  443. delete[] safe_index;
  444. }
  445. static void FixF0(const double *power_spectrum, const double *numerator_i,
  446. int fft_size, double fs, double current_f0, int number_of_harmonics,
  447. double *refined_f0, double *score) {
  448. double *amplitude_list = new double[number_of_harmonics];
  449. double *instantaneous_frequency_list = new double[number_of_harmonics];
  450. int index;
  451. for (int i = 0; i < number_of_harmonics; ++i) {
  452. index = matlab_round(current_f0 * fft_size / fs * (i + 1));
  453. instantaneous_frequency_list[i] = power_spectrum[index] == 0.0 ? 0.0 :
  454. static_cast<double>(index) * fs / fft_size +
  455. numerator_i[index] / power_spectrum[index] * fs / 2.0 / world::kPi;
  456. amplitude_list[i] = sqrt(power_spectrum[index]);
  457. }
  458. double denominator = 0.0;
  459. double numerator = 0.0;
  460. *score = 0.0;
  461. for (int i = 0; i < number_of_harmonics; ++i) {
  462. numerator += amplitude_list[i] * instantaneous_frequency_list[i];
  463. denominator += amplitude_list[i] * (i + 1.0);
  464. *score += fabs((instantaneous_frequency_list[i] / (i + 1.0) - current_f0) /
  465. current_f0);
  466. }
  467. *refined_f0 = numerator / (denominator + world::kMySafeGuardMinimum);
  468. *score = 1.0 / (*score / number_of_harmonics + world::kMySafeGuardMinimum);
  469. delete[] amplitude_list;
  470. delete[] instantaneous_frequency_list;
  471. }
  472. //-----------------------------------------------------------------------------
  473. // GetMeanF0() calculates the instantaneous frequency.
  474. //-----------------------------------------------------------------------------
  475. static void GetMeanF0(const double *x, int x_length, double fs,
  476. double current_position, double current_f0, int fft_size,
  477. double window_length_in_time, const double *base_time,
  478. int base_time_length, double *refined_f0, double *refined_score) {
  479. ForwardRealFFT forward_real_fft = { 0 };
  480. InitializeForwardRealFFT(fft_size, &forward_real_fft);
  481. fft_complex *main_spectrum = new fft_complex[fft_size];
  482. fft_complex *diff_spectrum = new fft_complex[fft_size];
  483. int *base_index = new int[base_time_length];
  484. double *main_window = new double[base_time_length];
  485. double *diff_window = new double[base_time_length];
  486. GetBaseIndex(current_position, base_time, base_time_length, fs, base_index);
  487. GetMainWindow(current_position, base_index, base_time_length, fs,
  488. window_length_in_time, main_window);
  489. GetDiffWindow(main_window, base_time_length, diff_window);
  490. GetSpectra(x, x_length, fft_size, base_index, main_window, diff_window,
  491. base_time_length, &forward_real_fft, main_spectrum, diff_spectrum);
  492. double *power_spectrum = new double[fft_size / 2 + 1];
  493. double *numerator_i = new double[fft_size / 2 + 1];
  494. for (int j = 0; j <= fft_size / 2; ++j) {
  495. numerator_i[j] = main_spectrum[j][0] * diff_spectrum[j][1] -
  496. main_spectrum[j][1] * diff_spectrum[j][0];
  497. power_spectrum[j] = main_spectrum[j][0] * main_spectrum[j][0] +
  498. main_spectrum[j][1] * main_spectrum[j][1];
  499. }
  500. int number_of_harmonics =
  501. MyMinInt(static_cast<int>(fs / 2.0 / current_f0), 6);
  502. FixF0(power_spectrum, numerator_i, fft_size, fs, current_f0,
  503. number_of_harmonics, refined_f0, refined_score);
  504. delete[] diff_spectrum;
  505. delete[] diff_window;
  506. delete[] main_window;
  507. delete[] base_index;
  508. delete[] numerator_i;
  509. delete[] power_spectrum;
  510. delete[] main_spectrum;
  511. DestroyForwardRealFFT(&forward_real_fft);
  512. }
  513. //-----------------------------------------------------------------------------
  514. // GetRefinedF0() calculates F0 and its score based on instantaneous frequency.
  515. //-----------------------------------------------------------------------------
  516. static void GetRefinedF0(const double *x, int x_length, double fs,
  517. double current_position, double current_f0, double f0_floor, double f0_ceil,
  518. double *refined_f0, double *refined_score) {
  519. if (current_f0 <= 0.0) {
  520. *refined_f0 = 0.0;
  521. *refined_score = 0.0;
  522. return;
  523. }
  524. int half_window_length = static_cast<int>(1.5 * fs / current_f0 + 1.0);
  525. double window_length_in_time = (2.0 * half_window_length + 1.0) / fs;
  526. double *base_time = new double[half_window_length * 2 + 1];
  527. for (int i = 0; i < half_window_length * 2 + 1; i++)
  528. base_time[i] = (-half_window_length + i) / fs;
  529. int fft_size = static_cast<int>(pow(2.0, 2.0 +
  530. static_cast<int>(log(half_window_length * 2.0 + 1.0) / world::kLog2)));
  531. GetMeanF0(x, x_length, fs, current_position, current_f0, fft_size,
  532. window_length_in_time, base_time, half_window_length * 2 + 1,
  533. refined_f0, refined_score);
  534. if (*refined_f0 < f0_floor || *refined_f0 > f0_ceil ||
  535. *refined_score < 2.5) {
  536. *refined_f0 = 0.0;
  537. *refined_score = 0.0;
  538. }
  539. delete[] base_time;
  540. }
  541. //-----------------------------------------------------------------------------
  542. // RefineF0() modifies the F0 by instantaneous frequency.
  543. //-----------------------------------------------------------------------------
  544. static void RefineF0Candidates(const double *x, int x_length, double fs,
  545. const double *temporal_positions, int f0_length, int max_candidates,
  546. double f0_floor, double f0_ceil,
  547. double **refined_f0_candidates, double **f0_scores) {
  548. for (int i = 0; i < f0_length; i++)
  549. for (int j = 0; j < max_candidates; ++j)
  550. GetRefinedF0(x, x_length, fs, temporal_positions[i],
  551. refined_f0_candidates[i][j], f0_floor, f0_ceil,
  552. &refined_f0_candidates[i][j], &f0_scores[i][j]);
  553. }
  554. //-----------------------------------------------------------------------------
  555. // SelectBestF0() obtains the nearlest F0 in reference_f0.
  556. //-----------------------------------------------------------------------------
  557. static double SelectBestF0(double reference_f0, const double *f0_candidates,
  558. int number_of_candidates, double allowed_range, double *best_error) {
  559. double best_f0 = 0.0;
  560. *best_error = allowed_range;
  561. double tmp;
  562. for (int i = 0; i < number_of_candidates; ++i) {
  563. tmp = fabs(reference_f0 - f0_candidates[i]) / reference_f0;
  564. if (tmp > *best_error) continue;
  565. best_f0 = f0_candidates[i];
  566. *best_error = tmp;
  567. }
  568. return best_f0;
  569. }
  570. static void RemoveUnreliableCandidatesSub(int i, int j,
  571. const double * const *tmp_f0_candidates, int number_of_candidates,
  572. double **f0_candidates, double **f0_scores) {
  573. double reference_f0 = f0_candidates[i][j];
  574. double error1, error2, min_error;
  575. double threshold = 0.05;
  576. if (reference_f0 == 0) return;
  577. SelectBestF0(reference_f0, tmp_f0_candidates[i + 1],
  578. number_of_candidates, 1.0, &error1);
  579. SelectBestF0(reference_f0, tmp_f0_candidates[i - 1],
  580. number_of_candidates, 1.0, &error2);
  581. min_error = MyMinDouble(error1, error2);
  582. if (min_error <= threshold) return;
  583. f0_candidates[i][j] = 0;
  584. f0_scores[i][j] = 0;
  585. }
  586. //-----------------------------------------------------------------------------
  587. // RemoveUnreliableCandidates().
  588. //-----------------------------------------------------------------------------
  589. static void RemoveUnreliableCandidates(int f0_length, int number_of_candidates,
  590. double **f0_candidates, double **f0_scores) {
  591. double **tmp_f0_candidates = new double *[f0_length];
  592. for (int i = 0; i < f0_length; ++i)
  593. tmp_f0_candidates[i] = new double[number_of_candidates];
  594. for (int i = 0; i < f0_length; ++i)
  595. for (int j = 0; j < number_of_candidates; ++j)
  596. tmp_f0_candidates[i][j] = f0_candidates[i][j];
  597. for (int i = 1; i < f0_length - 1; ++i)
  598. for (int j = 0; j < number_of_candidates; ++j)
  599. RemoveUnreliableCandidatesSub(i, j, tmp_f0_candidates,
  600. number_of_candidates, f0_candidates, f0_scores);
  601. for (int i = 0; i < f0_length; ++i) delete[] tmp_f0_candidates[i];
  602. delete[] tmp_f0_candidates;
  603. }
  604. //-----------------------------------------------------------------------------
  605. // SearchF0Base() gets the F0 with the highest score.
  606. //-----------------------------------------------------------------------------
  607. static void SearchF0Base(const double * const *f0_candidates,
  608. const double * const *f0_scores, int f0_length, int number_of_candidates,
  609. double *base_f0_contour) {
  610. double tmp_best_score;
  611. for (int i = 0; i < f0_length; ++i) {
  612. base_f0_contour[i] = tmp_best_score = 0.0;
  613. for (int j = 0; j < number_of_candidates; ++j)
  614. if (f0_scores[i][j] > tmp_best_score) {
  615. base_f0_contour[i] = f0_candidates[i][j];
  616. tmp_best_score = f0_scores[i][j];
  617. }
  618. }
  619. }
  620. //-----------------------------------------------------------------------------
  621. // Step 1: Rapid change of F0 contour is replaced by 0.
  622. //-----------------------------------------------------------------------------
  623. static void FixStep1(const double *f0_base, int f0_length,
  624. double allowed_range, double *f0_step1) {
  625. for (int i = 0; i < f0_length; ++i) f0_step1[i] = 0.0;
  626. double reference_f0;
  627. for (int i = 2; i < f0_length; ++i) {
  628. if (f0_base[i] == 0.0) continue;
  629. reference_f0 = f0_base[i - 1] * 2 - f0_base[i - 2];
  630. f0_step1[i] =
  631. fabs((f0_base[i] - reference_f0) / reference_f0) > allowed_range &&
  632. fabs((f0_base[i] - f0_base[i - 1])) / f0_base[i - 1] > allowed_range ?
  633. 0.0 : f0_base[i];
  634. }
  635. }
  636. //-----------------------------------------------------------------------------
  637. // GetBoundaryList() detects boundaries between voiced and unvoiced sections.
  638. //-----------------------------------------------------------------------------
  639. static int GetBoundaryList(const double *f0, int f0_length,
  640. int *boundary_list) {
  641. int number_of_boundaries = 0;
  642. int *vuv = new int[f0_length];
  643. for (int i = 0; i < f0_length; ++i)
  644. vuv[i] = f0[i] > 0 ? 1 : 0;
  645. vuv[0] = vuv[f0_length - 1] = 0;
  646. for (int i = 1; i < f0_length; ++i)
  647. if (vuv[i] - vuv[i - 1] != 0) {
  648. boundary_list[number_of_boundaries] = i - number_of_boundaries % 2;
  649. number_of_boundaries++;
  650. }
  651. delete[] vuv;
  652. return number_of_boundaries;
  653. }
  654. //-----------------------------------------------------------------------------
  655. // Step 2: Voiced sections with a short period are removed.
  656. //-----------------------------------------------------------------------------
  657. static void FixStep2(const double *f0_step1, int f0_length,
  658. int voice_range_minimum, double *f0_step2) {
  659. for (int i = 0; i < f0_length; ++i) f0_step2[i] = f0_step1[i];
  660. int *boundary_list = new int[f0_length];
  661. int number_of_boundaries =
  662. GetBoundaryList(f0_step1, f0_length, boundary_list);
  663. for (int i = 0; i < number_of_boundaries / 2; ++i) {
  664. if (boundary_list[i * 2 + 1] - boundary_list[i * 2] >= voice_range_minimum)
  665. continue;
  666. for (int j = boundary_list[i * 2]; j <= boundary_list[(i * 2) + 1]; ++j)
  667. f0_step2[j] = 0.0;
  668. }
  669. delete[] boundary_list;
  670. }
  671. //-----------------------------------------------------------------------------
  672. // GetMultiChannelF0() separates each voiced section into independent channel.
  673. //-----------------------------------------------------------------------------
  674. static void GetMultiChannelF0(const double *f0, int f0_length,
  675. const int *boundary_list, int number_of_boundaries,
  676. double **multi_channel_f0) {
  677. for (int i = 0; i < number_of_boundaries / 2; ++i) {
  678. for (int j = 0; j < boundary_list[i * 2]; ++j)
  679. multi_channel_f0[i][j] = 0.0;
  680. for (int j = boundary_list[i * 2]; j <= boundary_list[i * 2 + 1]; ++j)
  681. multi_channel_f0[i][j] = f0[j];
  682. for (int j = boundary_list[i * 2 + 1] + 1; j < f0_length; ++j)
  683. multi_channel_f0[i][j] = 0.0;
  684. }
  685. }
  686. //-----------------------------------------------------------------------------
  687. // abs() often causes bugs, an original function is used.
  688. //-----------------------------------------------------------------------------
  689. static inline int MyAbsInt(int x) {
  690. return x > 0 ? x : -x;
  691. }
  692. //-----------------------------------------------------------------------------
  693. // ExtendF0() : The Hand erasing the Space.
  694. // The subfunction of Extend().
  695. //-----------------------------------------------------------------------------
  696. static int ExtendF0(const double *f0, int f0_length, int origin,
  697. int last_point, int shift, const double * const *f0_candidates,
  698. int number_of_candidates, double allowed_range, double *extended_f0) {
  699. int threshold = 4;
  700. double tmp_f0 = extended_f0[origin];
  701. int shifted_origin = origin;
  702. int distance = MyAbsInt(last_point - origin);
  703. int *index_list = new int[distance + 1];
  704. for (int i = 0; i <= distance; ++i) index_list[i] = origin + shift * i;
  705. int count = 0;
  706. double dammy;
  707. for (int i = 0; i <= distance; ++i) {
  708. extended_f0[index_list[i] + shift] =
  709. SelectBestF0(tmp_f0, f0_candidates[index_list[i] + shift],
  710. number_of_candidates, allowed_range, &dammy);
  711. if (extended_f0[index_list[i] + shift] == 0.0) {
  712. count++;
  713. } else {
  714. tmp_f0 = extended_f0[index_list[i] + shift];
  715. count = 0;
  716. shifted_origin = index_list[i] + shift;
  717. }
  718. if (count == threshold) break;
  719. }
  720. delete[] index_list;
  721. return shifted_origin;
  722. }
  723. //-----------------------------------------------------------------------------
  724. // Swap the f0 contour and boundary.
  725. // It is used in ExtendSub() and MergeF0();
  726. //-----------------------------------------------------------------------------
  727. static void Swap(int index1, int index2, double **f0, int *boundary) {
  728. double *tmp_pointer;
  729. int tmp_index;
  730. tmp_pointer = f0[index1];
  731. f0[index1] = f0[index2];
  732. f0[index2] = tmp_pointer;
  733. tmp_index = boundary[index1 * 2];
  734. boundary[index1 * 2] = boundary[index2 * 2];
  735. boundary[index2 * 2] = tmp_index;
  736. tmp_index = boundary[index1 * 2 + 1];
  737. boundary[index1 * 2 + 1] = boundary[index2 * 2 + 1];
  738. boundary[index2 * 2 + 1] = tmp_index;
  739. }
  740. static int ExtendSub(const double * const *extended_f0,
  741. const int *boundary_list, int number_of_sections,
  742. double **selected_extended_f0, int *selected_boundary_list) {
  743. double threshold = 2200.0;
  744. int count = 0;
  745. double mean_f0 = 0.0;
  746. int st, ed;
  747. for (int i = 0; i < number_of_sections; ++i) {
  748. st = boundary_list[i * 2];
  749. ed = boundary_list[i * 2 + 1];
  750. for (int j = st; j < ed; ++j) mean_f0 += extended_f0[i][j];
  751. mean_f0 /= ed - st;
  752. if (threshold / mean_f0 < ed - st)
  753. Swap(count++, i, selected_extended_f0, selected_boundary_list);
  754. }
  755. return count;
  756. }
  757. //-----------------------------------------------------------------------------
  758. // Extend() : The Hand erasing the Space.
  759. //-----------------------------------------------------------------------------
  760. static int Extend(const double * const *multi_channel_f0,
  761. int number_of_sections, int f0_length, const int *boundary_list,
  762. const double * const *f0_candidates, int number_of_candidates,
  763. double allowed_range, double **extended_f0, int *shifted_boundary_list) {
  764. int threshold = 100;
  765. for (int i = 0; i < number_of_sections; ++i) {
  766. shifted_boundary_list[i * 2 + 1] = ExtendF0(multi_channel_f0[i],
  767. f0_length, boundary_list[i * 2 + 1],
  768. MyMinInt(f0_length - 2, boundary_list[i * 2 + 1] + threshold), 1,
  769. f0_candidates, number_of_candidates, allowed_range, extended_f0[i]);
  770. shifted_boundary_list[i * 2] = ExtendF0(multi_channel_f0[i], f0_length,
  771. boundary_list[i * 2], MyMaxInt(1, boundary_list[i * 2] - threshold), -1,
  772. f0_candidates, number_of_candidates, allowed_range, extended_f0[i]);
  773. }
  774. return ExtendSub(multi_channel_f0, shifted_boundary_list,
  775. number_of_sections, extended_f0, shifted_boundary_list);
  776. }
  777. //-----------------------------------------------------------------------------
  778. // Indices are sorted.
  779. //-----------------------------------------------------------------------------
  780. static void MakeSortedOrder(const int *boundary_list, int number_of_sections,
  781. int *order) {
  782. for (int i = 0; i < number_of_sections; ++i) order[i] = i;
  783. int tmp;
  784. for (int i = 1; i < number_of_sections; ++i)
  785. for (int j = i - 1; j >= 0; --j)
  786. if (boundary_list[order[j] * 2] > boundary_list[order[i] * 2]) {
  787. tmp = order[i];
  788. order[i] = order[j];
  789. order[j] = tmp;
  790. } else {
  791. break;
  792. }
  793. }
  794. //-----------------------------------------------------------------------------
  795. // Serach the highest score with the candidate F0.
  796. //-----------------------------------------------------------------------------
  797. static double SearchScore(double f0, const double *f0_candidates,
  798. const double *f0_scores, int number_of_candidates) {
  799. double score = 0.0;
  800. for (int i = 0; i < number_of_candidates; ++i)
  801. if (f0 == f0_candidates[i] && score < f0_scores[i]) score = f0_scores[i];
  802. return score;
  803. }
  804. //-----------------------------------------------------------------------------
  805. // Subfunction of MergeF0()
  806. //-----------------------------------------------------------------------------
  807. static int MergeF0Sub(const double *f0_1, int f0_length, int st1, int ed1,
  808. const double *f0_2, int st2, int ed2, const double * const *f0_candidates,
  809. const double * const *f0_scores, int number_of_candidates,
  810. double *merged_f0) {
  811. if (st1 <= st2 && ed1 >= ed2) return ed1;
  812. double score1 = 0.0;
  813. double score2 = 0.0;
  814. for (int i = st2; i <= ed1; ++i) {
  815. score1 += SearchScore(f0_1[i], f0_candidates[i], f0_scores[i],
  816. number_of_candidates);
  817. score2 += SearchScore(f0_2[i], f0_candidates[i], f0_scores[i],
  818. number_of_candidates);
  819. }
  820. if (score1 > score2)
  821. for (int i = ed1; i <= ed2; ++i) merged_f0[i] = f0_2[i];
  822. else
  823. for (int i = st2; i <= ed2; ++i) merged_f0[i] = f0_2[i];
  824. return ed2;
  825. }
  826. //-----------------------------------------------------------------------------
  827. // Overlapped F0 contours are merged by the likability score.
  828. //-----------------------------------------------------------------------------
  829. static void MergeF0(const double * const *multi_channel_f0, int *boundary_list,
  830. int number_of_channels, int f0_length, const double * const *f0_candidates,
  831. const double * const *f0_scores, int number_of_candidates,
  832. double *merged_f0) {
  833. int *order = new int[number_of_channels];
  834. MakeSortedOrder(boundary_list, number_of_channels, order);
  835. for (int i = 0; i < f0_length; ++i)
  836. merged_f0[i] = multi_channel_f0[0][i];
  837. for (int i = 1; i < number_of_channels; ++i)
  838. if (boundary_list[order[i] * 2] - boundary_list[1] > 0) {
  839. for (int j = boundary_list[order[i] * 2];
  840. j <= boundary_list[order[i] * 2 + 1]; ++j)
  841. merged_f0[j] = multi_channel_f0[order[i]][j];
  842. boundary_list[0] = boundary_list[order[i] * 2];
  843. boundary_list[1] = boundary_list[order[i] * 2 + 1];
  844. } else {
  845. boundary_list[1] =
  846. MergeF0Sub(merged_f0, f0_length, boundary_list[0], boundary_list[1],
  847. multi_channel_f0[order[i]], boundary_list[order[i] * 2],
  848. boundary_list[order[i] * 2 + 1], f0_candidates, f0_scores,
  849. number_of_candidates, merged_f0);
  850. }
  851. delete[] order;
  852. }
  853. //-----------------------------------------------------------------------------
  854. // Step 3: Voiced sections are extended based on the continuity of F0 contour
  855. //-----------------------------------------------------------------------------
  856. static void FixStep3(const double *f0_step2, int f0_length,
  857. int number_of_candidates, const double * const *f0_candidates,
  858. double allowed_range, const double * const *f0_scores, double *f0_step3) {
  859. for (int i = 0; i < f0_length; ++i) f0_step3[i] = f0_step2[i];
  860. int *boundary_list = new int[f0_length];
  861. int number_of_boundaries =
  862. GetBoundaryList(f0_step2, f0_length, boundary_list);
  863. double **multi_channel_f0 = new double *[number_of_boundaries / 2];
  864. for (int i = 0; i < number_of_boundaries / 2; ++i)
  865. multi_channel_f0[i] = new double[f0_length];
  866. GetMultiChannelF0(f0_step2, f0_length, boundary_list, number_of_boundaries,
  867. multi_channel_f0);
  868. int number_of_channels =
  869. Extend(multi_channel_f0, number_of_boundaries / 2, f0_length,
  870. boundary_list, f0_candidates, number_of_candidates, allowed_range,
  871. multi_channel_f0, boundary_list);
  872. if (number_of_channels != 0)
  873. MergeF0(multi_channel_f0, boundary_list, number_of_channels, f0_length,
  874. f0_candidates, f0_scores, number_of_candidates, f0_step3);
  875. for (int i = 0; i < number_of_boundaries / 2; ++i)
  876. delete[] multi_channel_f0[i];
  877. delete[] multi_channel_f0;
  878. delete[] boundary_list;
  879. }
  880. //-----------------------------------------------------------------------------
  881. // Step 4: F0s in short unvoiced section are faked
  882. //-----------------------------------------------------------------------------
  883. static void FixStep4(const double *f0_step3, int f0_length, int threshold,
  884. double *f0_step4) {
  885. for (int i = 0; i < f0_length; ++i) f0_step4[i] = f0_step3[i];
  886. int *boundary_list = new int[f0_length];
  887. int number_of_boundaries =
  888. GetBoundaryList(f0_step3, f0_length, boundary_list);
  889. int distance;
  890. double tmp0, tmp1, coefficient;
  891. int count;
  892. for (int i = 0; i < number_of_boundaries / 2 - 1; ++i) {
  893. distance = boundary_list[(i + 1) * 2] - boundary_list[i * 2 + 1] - 1;
  894. if (distance >= threshold) continue;
  895. tmp0 = f0_step3[boundary_list[i * 2 + 1]] + 1;
  896. tmp1 = f0_step3[boundary_list[(i + 1) * 2]] - 1;
  897. coefficient = (tmp1 - tmp0) / (distance + 1.0);
  898. count = 1;
  899. for (int j = boundary_list[i * 2 + 1] + 1;
  900. j <= boundary_list[(i + 1) * 2] - 1; ++j)
  901. f0_step4[j] = tmp0 + coefficient * count++;
  902. }
  903. delete[] boundary_list;
  904. }
  905. //-----------------------------------------------------------------------------
  906. // FixF0Contour() obtains the likely F0 contour.
  907. //-----------------------------------------------------------------------------
  908. static void FixF0Contour(const double * const *f0_candidates,
  909. const double * const *f0_scores, int f0_length, int number_of_candidates,
  910. double *best_f0_contour) {
  911. double *tmp_f0_contour1 = new double[f0_length];
  912. double *tmp_f0_contour2 = new double[f0_length];
  913. // These parameters are optimized by speech databases.
  914. SearchF0Base(f0_candidates, f0_scores, f0_length,
  915. number_of_candidates, tmp_f0_contour1);
  916. FixStep1(tmp_f0_contour1, f0_length, 0.008, tmp_f0_contour2);
  917. FixStep2(tmp_f0_contour2, f0_length, 6, tmp_f0_contour1);
  918. FixStep3(tmp_f0_contour1, f0_length, number_of_candidates, f0_candidates,
  919. 0.18, f0_scores, tmp_f0_contour2);
  920. FixStep4(tmp_f0_contour2, f0_length, 9, best_f0_contour);
  921. delete[] tmp_f0_contour1;
  922. delete[] tmp_f0_contour2;
  923. }
  924. //-----------------------------------------------------------------------------
  925. // This function uses zero-lag Butterworth filter.
  926. //-----------------------------------------------------------------------------
  927. static void FilteringF0(const double *a, const double *b, double *x,
  928. int x_length, int st, int ed, double *y) {
  929. double w[2] = { 0.0, 0.0 };
  930. double wt;
  931. double *tmp_x = new double[x_length];
  932. for (int i = 0; i < st; ++i) x[i] = x[st];
  933. for (int i = ed + 1; i < x_length; ++i) x[i] = x[ed];
  934. for (int i = 0; i < x_length; ++i) {
  935. wt = x[i] + a[0] * w[0] + a[1] * w[1];
  936. tmp_x[x_length - i - 1] = b[0] * wt + b[1] * w[0] + b[0] * w[1];
  937. w[1] = w[0];
  938. w[0] = wt;
  939. }
  940. w[0] = w[1] = 0.0;
  941. for (int i = 0; i < x_length; ++i) {
  942. wt = tmp_x[i] + a[0] * w[0] + a[1] * w[1];
  943. y[x_length - i - 1] = b[0] * wt + b[1] * w[0] + b[0] * w[1];
  944. w[1] = w[0];
  945. w[0] = wt;
  946. }
  947. delete[] tmp_x;
  948. }
  949. //-----------------------------------------------------------------------------
  950. // SmoothF0Contour() uses the zero-lag Butterworth filter for smoothing.
  951. //-----------------------------------------------------------------------------
  952. static void SmoothF0Contour(const double *f0, int f0_length,
  953. double *smoothed_f0) {
  954. const double b[2] =
  955. { 0.0078202080334971724, 0.015640416066994345 };
  956. const double a[2] =
  957. { 1.7347257688092754, -0.76600660094326412 };
  958. int lag = 300;
  959. int new_f0_length = f0_length + lag * 2;
  960. double *f0_contour = new double[new_f0_length];
  961. for (int i = 0; i < lag; ++i) f0_contour[i] = 0.0;
  962. for (int i = lag; i < lag + f0_length; ++i) f0_contour[i] = f0[i - lag];
  963. for (int i = lag + f0_length; i < new_f0_length; ++i) f0_contour[i] = 0.0;
  964. int *boundary_list = new int[new_f0_length];
  965. int number_of_boundaries =
  966. GetBoundaryList(f0_contour, new_f0_length, boundary_list);
  967. double **multi_channel_f0 = new double *[number_of_boundaries / 2];
  968. for (int i = 0; i < number_of_boundaries / 2; ++i)
  969. multi_channel_f0[i] = new double[new_f0_length];
  970. GetMultiChannelF0(f0_contour, new_f0_length, boundary_list,
  971. number_of_boundaries, multi_channel_f0);
  972. for (int i = 0; i < number_of_boundaries / 2; ++i) {
  973. FilteringF0(a, b, multi_channel_f0[i], new_f0_length,
  974. boundary_list[i * 2], boundary_list[i * 2 + 1], f0_contour);
  975. for (int j = boundary_list[i * 2]; j <= boundary_list[i * 2 + 1]; ++j)
  976. smoothed_f0[j - lag] = f0_contour[j];
  977. }
  978. for (int i = 0; i < number_of_boundaries / 2; ++i)
  979. delete[] multi_channel_f0[i];
  980. delete[] multi_channel_f0;
  981. delete[] f0_contour;
  982. delete[] boundary_list;
  983. }
  984. //-----------------------------------------------------------------------------
  985. // HarvestGeneralBodySub() is the subfunction of HarvestGeneralBody()
  986. //-----------------------------------------------------------------------------
  987. static int HarvestGeneralBodySub(const double *boundary_f0_list,
  988. int number_of_channels, int f0_length, double actual_fs, int y_length,
  989. const double *temporal_positions, const fft_complex *y_spectrum,
  990. int fft_size, double f0_floor, double f0_ceil, int max_candidates,
  991. double **f0_candidates) {
  992. double **raw_f0_candidates = new double *[number_of_channels];
  993. for (int i = 0; i < number_of_channels; ++i)
  994. raw_f0_candidates[i] = new double[f0_length];
  995. GetRawF0Candidates(boundary_f0_list, number_of_channels,
  996. actual_fs, y_length, temporal_positions, f0_length, y_spectrum,
  997. fft_size, f0_floor, f0_ceil, raw_f0_candidates);
  998. int number_of_candidates = DetectOfficialF0Candidates(raw_f0_candidates,
  999. number_of_channels, f0_length, max_candidates, f0_candidates);
  1000. OverlapF0Candidates(f0_length, number_of_candidates, f0_candidates);
  1001. for (int i = 0; i < number_of_channels; ++i)
  1002. delete[] raw_f0_candidates[i];
  1003. delete[] raw_f0_candidates;
  1004. return number_of_candidates;
  1005. }
  1006. //-----------------------------------------------------------------------------
  1007. // HarvestGeneralBody() estimates the F0 contour based on Harvest.
  1008. //-----------------------------------------------------------------------------
  1009. static void HarvestGeneralBody(const double *x, int x_length, int fs,
  1010. int frame_period, double f0_floor, double f0_ceil,
  1011. double channels_in_octave, int speed, double *temporal_positions,
  1012. double *f0) {
  1013. double adjusted_f0_floor = f0_floor * 0.9;
  1014. double adjusted_f0_ceil = f0_ceil * 1.1;
  1015. int number_of_channels =
  1016. 1 + static_cast<int>(log(adjusted_f0_ceil / adjusted_f0_floor) /
  1017. world::kLog2 * channels_in_octave);
  1018. double *boundary_f0_list = new double[number_of_channels];
  1019. for (int i = 0; i < number_of_channels; ++i)
  1020. boundary_f0_list[i] =
  1021. adjusted_f0_floor * pow(2.0, (i + 1) / channels_in_octave);
  1022. // normalization
  1023. int decimation_ratio = MyMaxInt(MyMinInt(speed, 12), 1);
  1024. int y_length =
  1025. static_cast<int>(ceil(static_cast<double>(x_length) / decimation_ratio));
  1026. double actual_fs = static_cast<double>(fs) / decimation_ratio;
  1027. int fft_size = GetSuitableFFTSize(y_length + 5 +
  1028. 2 * static_cast<int>(2.0 * actual_fs / boundary_f0_list[0]));
  1029. // Calculation of the spectrum used for the f0 estimation
  1030. double *y = new double[fft_size];
  1031. fft_complex *y_spectrum = new fft_complex[fft_size];
  1032. GetWaveformAndSpectrum(x, x_length, y_length, actual_fs, fft_size,
  1033. decimation_ratio, y, y_spectrum);
  1034. int f0_length = GetSamplesForHarvest(fs, x_length, frame_period);
  1035. for (int i = 0; i < f0_length; ++i) {
  1036. temporal_positions[i] = i * frame_period / 1000.0;
  1037. f0[i] = 0.0;
  1038. }
  1039. int overlap_parameter = 7;
  1040. int max_candidates =
  1041. matlab_round(number_of_channels / 10.0) * overlap_parameter;
  1042. double **f0_candidates = new double *[f0_length];
  1043. double **f0_candidates_score = new double *[f0_length];
  1044. for (int i = 0; i < f0_length; ++i) {
  1045. f0_candidates[i] = new double[max_candidates];
  1046. f0_candidates_score[i] = new double[max_candidates];
  1047. }
  1048. int number_of_candidates = HarvestGeneralBodySub(boundary_f0_list,
  1049. number_of_channels, f0_length, actual_fs, y_length, temporal_positions,
  1050. y_spectrum, fft_size, f0_floor, f0_ceil, max_candidates, f0_candidates) *
  1051. overlap_parameter;
  1052. RefineF0Candidates(y, y_length, actual_fs, temporal_positions, f0_length,
  1053. number_of_candidates, f0_floor, f0_ceil, f0_candidates,
  1054. f0_candidates_score);
  1055. RemoveUnreliableCandidates(f0_length, number_of_candidates,
  1056. f0_candidates, f0_candidates_score);
  1057. double *best_f0_contour = new double[f0_length];
  1058. FixF0Contour(f0_candidates, f0_candidates_score, f0_length,
  1059. number_of_candidates, best_f0_contour);
  1060. SmoothF0Contour(best_f0_contour, f0_length, f0);
  1061. delete[] y;
  1062. delete[] best_f0_contour;
  1063. delete[] y_spectrum;
  1064. for (int i = 0; i < f0_length; ++i) {
  1065. delete[] f0_candidates[i];
  1066. delete[] f0_candidates_score[i];
  1067. }
  1068. delete[] f0_candidates;
  1069. delete[] f0_candidates_score;
  1070. delete[] boundary_f0_list;
  1071. }
  1072. } // namespace
  1073. int GetSamplesForHarvest(int fs, int x_length, double frame_period) {
  1074. return static_cast<int>(1000.0 * x_length / fs / frame_period) + 1;
  1075. }
  1076. void Harvest(const double *x, int x_length, int fs,
  1077. const HarvestOption *option, double *temporal_positions, double *f0) {
  1078. // Several parameters will be controllable for debug.
  1079. double target_fs = 8000.0;
  1080. int dimension_ratio = matlab_round(fs / target_fs);
  1081. double channels_in_octave = 40;
  1082. if (option->frame_period == 1.0) {
  1083. HarvestGeneralBody(x, x_length, fs, 1, option->f0_floor,
  1084. option->f0_ceil, channels_in_octave, dimension_ratio,
  1085. temporal_positions, f0);
  1086. return;
  1087. }
  1088. int basic_frame_period = 1;
  1089. int basic_f0_length =
  1090. GetSamplesForHarvest(fs, x_length, basic_frame_period);
  1091. double *basic_f0 = new double[basic_f0_length];
  1092. double *basic_temporal_positions = new double[basic_f0_length];
  1093. HarvestGeneralBody(x, x_length, fs, basic_frame_period, option->f0_floor,
  1094. option->f0_ceil, channels_in_octave, dimension_ratio,
  1095. basic_temporal_positions, basic_f0);
  1096. int f0_length = GetSamplesForHarvest(fs, x_length, option->frame_period);
  1097. for (int i = 0; i < f0_length; ++i) {
  1098. temporal_positions[i] = i * option->frame_period / 1000.0;
  1099. f0[i] = basic_f0[MyMinInt(basic_f0_length - 1,
  1100. matlab_round(temporal_positions[i] * 1000.0))];
  1101. }
  1102. delete[] basic_f0;
  1103. delete[] basic_temporal_positions;
  1104. }
  1105. void InitializeHarvestOption(HarvestOption *option) {
  1106. // You can change default parameters.
  1107. option->f0_ceil = world::kCeilF0;
  1108. option->f0_floor = world::kFloorF0;
  1109. option->frame_period = 5;
  1110. }