SpeechSynthesizer_and_TextGrid.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. /* SpeechSynthesizer_and_TextGrid.cpp
  2. *
  3. * Copyright (C) 2011-2017 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20111214
  20. */
  21. #include "DTW.h"
  22. #include "Sounds_to_DTW.h"
  23. #include "Sound_extensions.h"
  24. #include "SpeechSynthesizer_and_TextGrid.h"
  25. #include "CCs_to_DTW.h"
  26. #include "DTW_and_TextGrid.h"
  27. #include "NUMmachar.h"
  28. // prototypes
  29. static void IntervalTier_splitInterval (IntervalTier me, double time, conststring32 leftLabel, integer interval, double precision);
  30. static autoIntervalTier IntervalTier_IntervalTier_cutPartsMatchingLabel (IntervalTier me, IntervalTier thee, conststring32 label, double precision);
  31. static autoIntervalTier IntervalTiers_patch_noBoundaries (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision);
  32. static autoTable IntervalTiers_to_Table_textAlignmentment (IntervalTier target, IntervalTier source, EditCostsTable costs);
  33. static void IntervalTier_checkRange (IntervalTier me, integer startInterval, integer endinterval) {
  34. Melder_require (startInterval <= endinterval,
  35. U"The interval range end number should not be smaller than the interval range start number.");
  36. Melder_require (startInterval > 0,
  37. U"The specified interval range start number is ", startInterval, U", but should be at least 1.");
  38. Melder_require (endinterval <= my intervals.size,
  39. U"The specified interval range end number (", endinterval, U") exceeds the number of intervals (", my intervals.size, U") in this tier.");
  40. }
  41. autoSound SpeechSynthesizer_TextInterval_to_Sound (SpeechSynthesizer me, TextInterval thee, autoTextGrid *p_tg)
  42. {
  43. try {
  44. Melder_require (thy text && thy text[0] != U'\0', U"TextInterval should not be empty.");
  45. autoSound him = SpeechSynthesizer_to_Sound (me, thy text.get(), p_tg, nullptr);
  46. return him;
  47. } catch (MelderError) {
  48. Melder_throw (U"Sound not created from TextInterval.");
  49. }
  50. }
  51. autoSound SpeechSynthesizer_TextGrid_to_Sound (SpeechSynthesizer me, TextGrid thee, integer tierNumber, integer iinterval, autoTextGrid *p_tg) {
  52. try {
  53. TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
  54. IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
  55. Melder_require (intervalTier -> classInfo == classIntervalTier,
  56. U"Tier ", tierNumber, U" is not an interval tier.");
  57. Melder_require (iinterval > 0 && iinterval <= intervalTier -> intervals.size,
  58. U"Interval ", iinterval, U" does not exist on tier ", tierNumber, U".");
  59. return SpeechSynthesizer_TextInterval_to_Sound (me, intervalTier -> intervals.at [iinterval], p_tg);
  60. } catch (MelderError) {
  61. Melder_throw (U"Sound not created from textGrid.");
  62. }
  63. }
  64. #if 0
  65. static double TextGrid_getStartTimeOfFirstOccurence (TextGrid thee, integer tierNumber, conststring32 label) {
  66. TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
  67. IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
  68. if (intervalTier -> classInfo != classIntervalTier) {
  69. Melder_throw (U"Tier ", tierNumber, U" is not an interval tier.");
  70. }
  71. double start = undefined;
  72. for (integer iint = 1; iint <= intervalTier -> intervals.size; iint ++) {
  73. TextInterval ti = intervalTier -> intervals.at [iint];
  74. if (Melder_cmp (ti -> text, label) == 0) {
  75. start = ti -> xmin;
  76. break;
  77. }
  78. }
  79. return start;
  80. }
  81. static double TextGrid_getEndTimeOfLastOccurence (TextGrid thee, integer tierNumber, conststring32 label) {
  82. TextGrid_checkSpecifiedTierNumberWithinRange (thee, tierNumber);
  83. IntervalTier intervalTier = (IntervalTier) thy tiers->at [tierNumber];
  84. if (intervalTier -> classInfo != classIntervalTier) {
  85. Melder_throw (U"Tier ", tierNumber, U" is not an interval tier.");
  86. }
  87. double end = undefined;
  88. for (integer iint = intervalTier -> intervals.size; iint > 0; iint --) {
  89. TextInterval ti = intervalTier -> intervals.at [iint];
  90. if (Melder_equ (ti -> text, label)) {
  91. end = ti -> xmax;
  92. break;
  93. }
  94. }
  95. return end;
  96. }
  97. #endif
  98. static void IntervalTier_getLabelInfo (IntervalTier me, conststring32 label, double *labelDurations, integer *numberOfOccurences) {
  99. *labelDurations = 0;
  100. *numberOfOccurences = 0;
  101. for (integer i = 1; i <= my intervals.size; i ++) {
  102. TextInterval ti = my intervals.at [i];
  103. if (Melder_equ (ti -> text.get(), label)) {
  104. *labelDurations += ti -> xmax - ti -> xmin;
  105. (*numberOfOccurences)++;
  106. }
  107. }
  108. }
  109. #define TIMES_ARE_CLOSE(x,y) (fabs((x)-(y)) < precision)
  110. void IntervalTier_splitInterval (IntervalTier me, double time, conststring32 leftLabel, integer interval, double precision) {
  111. try {
  112. Melder_assert (interval > 0);
  113. TextInterval ti = nullptr;
  114. integer index = 0;
  115. for (integer i = interval; i <= my intervals.size; i ++) {
  116. ti = my intervals.at [i];
  117. if (time < ti -> xmax + precision && time > ti -> xmin - precision) {
  118. index = i; break;
  119. }
  120. }
  121. // if index == 0 then search left intervals??
  122. if (index == 0 || TIMES_ARE_CLOSE(time, ti -> xmin) || TIMES_ARE_CLOSE(time, ti -> xmax)) {
  123. return;
  124. }
  125. autoTextInterval newInterval = TextInterval_create (ti -> xmin, time, leftLabel);
  126. // Make start of current and begin of new interval equal
  127. ti -> xmin = time;
  128. my intervals. addItem_move (newInterval.move());
  129. } catch (MelderError) {
  130. Melder_throw (U"Boundary not inserted.");
  131. }
  132. }
  133. static autoTextTier TextTier_IntervalTier_cutPartsMatchingLabel (TextTier me, IntervalTier thee, conststring32 label, double precision) {
  134. try {
  135. if (my xmin != thy xmin || my xmax != thy xmax) {
  136. Melder_throw (U"Domains should be equal.");
  137. }
  138. integer myIndex = 1;
  139. double timeCut = 0.0;
  140. autoTextTier him = TextTier_create (0.0, my xmax - my xmin);
  141. for (integer j = 1; j <= thy intervals.size; j ++) {
  142. TextInterval cut = thy intervals.at [j];
  143. if (Melder_equ (cut -> text.get(), label)) {
  144. timeCut += cut -> xmax - cut -> xmin;
  145. } else {
  146. while (myIndex <= my points.size) {
  147. TextPoint tp = my points.at [myIndex];
  148. if (tp -> number < cut -> xmin - precision) {
  149. // point is left of cut
  150. myIndex++;
  151. } else if (tp -> number < cut -> xmax + precision) {
  152. // point is in (no)cut
  153. double time = tp -> number - my xmin - timeCut;
  154. TextTier_addPoint (him.get(), time, tp -> mark.get());
  155. myIndex++;
  156. } else {
  157. break;
  158. }
  159. }
  160. }
  161. }
  162. his xmax -= timeCut;
  163. return him;
  164. } catch (MelderError) {
  165. Melder_throw (me, U": parts not cut.");
  166. }
  167. }
  168. // Cut parts from me marked by labels in thee
  169. autoIntervalTier IntervalTier_IntervalTier_cutPartsMatchingLabel (IntervalTier me, IntervalTier thee, conststring32 label, double precision) {
  170. try {
  171. if (my xmin != thy xmin || my xmax != thy xmax) {
  172. Melder_throw (U"Domains should be equal.");
  173. }
  174. autoNUMvector<double> durations (1, my intervals.size);
  175. for (integer i = 1; i <= my intervals.size; i ++) {
  176. TextInterval ti = my intervals.at [i];
  177. durations[i] = ti -> xmax - ti -> xmin;
  178. }
  179. integer myInterval = 1;
  180. for (integer j = 1; j <= thy intervals.size; j ++) {
  181. TextInterval cut = thy intervals.at [j];
  182. if (Melder_equ (cut -> text.get(), label)) { // trim
  183. while (myInterval <= my intervals.size) {
  184. TextInterval ti = my intervals.at [myInterval];
  185. if (ti -> xmin > cut -> xmin - precision && ti -> xmax < cut -> xmax + precision) {
  186. // 1. interval completely within cut
  187. durations[myInterval] = 0;
  188. myInterval++;
  189. } else if (ti -> xmin < cut -> xmin + precision && cut -> xmin < ti -> xmax + precision) {
  190. // 2. cut start is within interval
  191. if (cut -> xmax > ti -> xmax - precision) {
  192. // interval end is in cut, interval start before
  193. durations[myInterval] -= ti -> xmax - cut -> xmin;
  194. myInterval++;
  195. } else {
  196. // 3. cut completely within interval
  197. durations[myInterval] -= cut -> xmax - cut -> xmin;
  198. break;
  199. }
  200. } else if (cut -> xmax > ti -> xmin - precision && cut -> xmin < ti -> xmax + precision) {
  201. // +1+2 : cut end is within interval, cut start before
  202. durations[myInterval] -= cut -> xmax - ti -> xmin;
  203. break;
  204. } else if (ti -> xmax < cut -> xmin + precision) {
  205. myInterval++;
  206. }
  207. }
  208. }
  209. }
  210. double totalDuration = 0;
  211. for (integer i = 1; i <= my intervals.size; i ++) {
  212. if (durations[i] < precision) {
  213. durations[i] = 0;
  214. }
  215. totalDuration += durations[i];
  216. }
  217. autoIntervalTier him = IntervalTier_create (0, totalDuration);
  218. double time = 0; integer hisInterval = 1;
  219. for (integer i = 1; i <= my intervals.size; i ++) {
  220. if (durations[i] <= 0) continue;
  221. TextInterval ti = my intervals.at [i];
  222. time += durations[i];
  223. if (fabs (time - totalDuration) > precision) {
  224. IntervalTier_splitInterval (him.get(), time, ti -> text.get(), hisInterval, precision);
  225. hisInterval++;
  226. } else { // last interval
  227. TextInterval histi = his intervals.at [hisInterval];
  228. TextInterval_setText (histi, ti -> text.get());
  229. }
  230. }
  231. return him;
  232. } catch (MelderError) {
  233. Melder_throw (me, U": parts not cut.");
  234. }
  235. }
  236. autoTextGrid TextGrid_IntervalTier_cutPartsMatchingLabel (TextGrid me, IntervalTier thee, conststring32 label, double precision) {
  237. try {
  238. if (my xmin != thy xmin || my xmax != thy xmax) {
  239. Melder_throw (U"Domains should be equal.");
  240. }
  241. double cutDurations = 0;
  242. for (integer i = 1; i <= thy intervals.size; i ++) {
  243. TextInterval cut = thy intervals.at [i];
  244. if (Melder_equ (cut -> text.get(), label)) {
  245. cutDurations += cut -> xmax - cut -> xmin;
  246. }
  247. }
  248. if (cutDurations <= precision) { // Nothing to patch
  249. return Data_copy (me);
  250. }
  251. autoTextGrid him = TextGrid_createWithoutTiers (0, thy xmax - thy xmin - cutDurations);
  252. for (integer itier = 1; itier <= my tiers->size; itier ++) {
  253. Function anyTier = my tiers->at [itier];
  254. if (anyTier -> classInfo == classIntervalTier) {
  255. autoIntervalTier newTier = IntervalTier_IntervalTier_cutPartsMatchingLabel ((IntervalTier) anyTier, thee, label, precision);
  256. his tiers -> addItem_move (newTier.move());
  257. } else {
  258. autoTextTier newTier = TextTier_IntervalTier_cutPartsMatchingLabel ((TextTier) anyTier, thee, label, precision);
  259. his tiers -> addItem_move (newTier.move());
  260. }
  261. }
  262. return him;
  263. } catch (MelderError) {
  264. Melder_throw (me, U": no parts cut.");
  265. }
  266. }
  267. // Patch thy intervals that match patchLabel into my intervals
  268. // The resulting IntervalTier has thy xmin as starting time and thy xmax as end time
  269. autoIntervalTier IntervalTiers_patch_noBoundaries (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
  270. try {
  271. autoNUMvector <double> durations ((integer) 0, my intervals.size + 1);
  272. for (integer i = 1; i <= my intervals.size; i ++) {
  273. TextInterval myti = my intervals.at [i];
  274. durations [i] = myti -> xmax - myti -> xmin;
  275. }
  276. integer myInterval = 1;
  277. double xShift = thy xmin - my xmin;
  278. for (integer j = 1; j <= thy intervals.size; j ++) {
  279. TextInterval patch = thy intervals.at [j];
  280. if (Melder_equ (patch -> text.get(), patchLabel)) {
  281. if (j == 1) {
  282. xShift += durations[0] = patch -> xmax - patch -> xmin;
  283. } else if (j == thy intervals.size) {
  284. durations [my intervals.size + 1] = patch -> xmax - patch -> xmin;
  285. } else {
  286. while (myInterval <= my intervals.size) {
  287. TextInterval ti = my intervals.at [myInterval];
  288. double tixmin = ti -> xmin + xShift;
  289. double tixmax = ti -> xmax + xShift;
  290. if ((patch -> xmin > tixmin - precision) && (patch -> xmin < tixmax + precision)) {
  291. durations[myInterval] += patch -> xmax - patch -> xmin;
  292. break;
  293. }
  294. myInterval++;
  295. }
  296. }
  297. } else {
  298. while (myInterval <= my intervals.size) {
  299. TextInterval ti = my intervals.at [myInterval];
  300. double tixmax = ti -> xmax + xShift;
  301. if (tixmax < patch -> xmin + precision) {
  302. myInterval++;
  303. } else {
  304. break;
  305. }
  306. }
  307. }
  308. }
  309. autoIntervalTier him = IntervalTier_create (thy xmin, thy xmax);
  310. // first interval
  311. double time = thy xmin + durations[0];
  312. integer hisInterval = 1;
  313. if (durations [0] > 0) {
  314. IntervalTier_splitInterval (him.get(), time , U"", hisInterval, precision);
  315. hisInterval++;
  316. }
  317. for (integer i = 1; i <= my intervals.size; i ++) {
  318. TextInterval ti = my intervals.at [i];
  319. time += durations [i];
  320. IntervalTier_splitInterval (him.get(), time, ti -> text.get(), hisInterval, precision);
  321. hisInterval++;
  322. }
  323. if (durations [my intervals.size + 1] > 0) {
  324. time += durations [my intervals.size + 1];
  325. IntervalTier_splitInterval (him.get(), time , U"", hisInterval, precision);
  326. }
  327. return him;
  328. } catch (MelderError) {
  329. Melder_throw (me, U": not patched.");
  330. }
  331. }
  332. #if 0
  333. static autoIntervalTier IntervalTiers_patch (IntervalTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
  334. try {
  335. autoIntervalTier him = IntervalTier_create (thy xmin, thy xmax);
  336. integer myInterval = 1, hisInterval = 1;
  337. double xmax = thy xmin;
  338. for (integer i = 1; i <= thy intervals.size; i ++) {
  339. TextInterval myti, ti = thy intervals.at [i];
  340. if (Melder_equ (ti -> text, patchLabel)) {
  341. bool splitInterval = false; double endtime, split = 0;
  342. if (i > 0) {
  343. while (myInterval <= my intervals.size) {
  344. myti = my intervals.at [myInterval];
  345. endtime = xmax + myti -> xmax - myti -> xmin;
  346. if (endtime <= ti -> xmin + precision) {
  347. xmax = endtime;
  348. IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
  349. hisInterval++;
  350. } else {
  351. if (xmax < ti -> xmin - precision) { // split interval ???
  352. splitInterval = true;
  353. xmax = ti -> xmin;
  354. split = endtime - xmax;
  355. IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
  356. hisInterval ++; myInterval++;
  357. }
  358. break;
  359. }
  360. myInterval++;
  361. }
  362. }
  363. xmax += ti -> xmax - ti -> xmin;
  364. IntervalTier_splitInterval (him.get(), xmax, U"", hisInterval, precision);
  365. hisInterval++;
  366. if (splitInterval) {
  367. xmax += split;
  368. IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
  369. hisInterval ++;
  370. }
  371. } else if (i == thy intervals.size) { // copy remaining if last interval doesn't match
  372. while (myInterval <= my intervals.size) {
  373. myti = my intervals.at [myInterval];
  374. xmax += myti -> xmax - myti -> xmin;
  375. IntervalTier_splitInterval (him.get(), xmax, myti -> text, hisInterval, precision);
  376. hisInterval++;
  377. myInterval++;
  378. }
  379. }
  380. }
  381. return him;
  382. } catch (MelderError) {
  383. Melder_throw (me, U": not patched.");
  384. }
  385. }
  386. #endif
  387. static autoTextTier TextTier_IntervalTier_patch (TextTier me, IntervalTier thee, conststring32 patchLabel, double precision) {
  388. try {
  389. integer myIndex = 1;
  390. autoTextTier him = TextTier_create (thy xmin, thy xmax);
  391. double xShift = thy xmin - my xmin;
  392. for (integer i = 1; i <= thy intervals.size; i ++) {
  393. TextInterval ti = thy intervals.at [i];
  394. if (Melder_equ (ti -> text.get(), patchLabel)) {
  395. if (i > 1) {
  396. while (myIndex <= my points.size) {
  397. TextPoint tp = my points.at [myIndex];
  398. double time = tp -> number + xShift;
  399. if (time < ti -> xmin + precision) {
  400. autoTextPoint newPoint = TextPoint_create (time, tp -> mark.get());
  401. his points. addItem_move (newPoint.move());
  402. } else {
  403. break;
  404. }
  405. myIndex++;
  406. }
  407. }
  408. xShift += ti -> xmax - ti -> xmin;
  409. } else if (i == thy intervals.size) {
  410. while (myIndex <= my points.size) {
  411. TextPoint tp = my points.at [myIndex];
  412. double time = tp -> number + xShift;
  413. if (time < ti -> xmin + precision) {
  414. autoTextPoint newPoint = TextPoint_create (time, tp -> mark.get());
  415. his points. addItem_move (newPoint.move());
  416. }
  417. myIndex++;
  418. }
  419. }
  420. }
  421. return him;
  422. } catch (MelderError) {
  423. Melder_throw (me, U": cannot patch TextTier.");
  424. }
  425. }
  426. autoTextGrid TextGrid_IntervalTier_patch (TextGrid me, IntervalTier thee, conststring32 patchLabel, double precision) {
  427. try {
  428. double patchDurations;
  429. integer numberOfPatches;
  430. IntervalTier_getLabelInfo (thee, patchLabel, &patchDurations, &numberOfPatches);
  431. if (patchDurations <= 0 || my xmax - my xmin >= thy xmax - thy xmin ) { // Nothing to patch
  432. return Data_copy (me);
  433. }
  434. autoTextGrid him = TextGrid_createWithoutTiers (thy xmin, thy xmax);
  435. for (integer itier = 1; itier <= my tiers->size; itier ++) {
  436. Function anyTier = my tiers->at [itier];
  437. if (anyTier -> classInfo == classIntervalTier) {
  438. // autoIntervalTier ait = IntervalTiers_patch ((IntervalTier) anyTier, thee, patchLabel, precision);
  439. autoIntervalTier newTier = IntervalTiers_patch_noBoundaries ((IntervalTier) anyTier, thee, patchLabel, precision);
  440. his tiers -> addItem_move (newTier.move());
  441. } else {
  442. autoTextTier newTier = TextTier_IntervalTier_patch ((TextTier) anyTier, thee, patchLabel, precision);
  443. his tiers -> addItem_move (newTier.move());
  444. }
  445. }
  446. return him;
  447. } catch (MelderError) {
  448. Melder_throw (me, U": not patched.");
  449. }
  450. }
  451. // We assume that the Sound and the SpeechSynthesizer have the same samplingFrequency
  452. autoTextGrid SpeechSynthesizer_Sound_TextInterval_align (SpeechSynthesizer me, Sound thee, TextInterval him, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
  453. try {
  454. Melder_require (thy xmin == his xmin && thy xmax == his xmax,
  455. U"Domains of Sound and TextGrid should be equal.");
  456. Melder_require (fabs (1.0 / thy dx - my d_samplingFrequency) < 1e-9,
  457. U"The sampling frequencies of the SpeechSynthesizer and the Sound should be equal.");
  458. autostring32vector tokens = STRVECtokenize (his text.get());
  459. integer numberOfTokens = tokens.size;
  460. Melder_require (numberOfTokens > 0, U"The interval should have text.");
  461. /*
  462. Remove silent intervals from start and end of sounds because
  463. 1. it will improve the word rate guess
  464. 2. it will improve the DTW matching.
  465. */
  466. double minPitch = 200.0, timeStep = 0.005, precision = thy dx;
  467. double startTimeOfSounding, endTimeOfSounding;
  468. autoSound soundTrimmed = Sound_trimSilencesAtStartAndEnd (thee, 0.0, minPitch, timeStep, silenceThreshold, minSilenceDuration, minSoundingDuration, & startTimeOfSounding, & endTimeOfSounding);
  469. double duration_soundTrimmed = soundTrimmed -> xmax - soundTrimmed -> xmin;
  470. bool hasSilence_sound = fabs (startTimeOfSounding - thy xmin) > precision || fabs (endTimeOfSounding - thy xmax) > precision;
  471. if (my d_estimateSpeechRate) {
  472. // estimate speaking rate with the number of words per minute from the text
  473. double wordsPerMinute_rawTokens = 60.0 * numberOfTokens / duration_soundTrimmed;
  474. // compensation for long words: 5 characters / word
  475. double wordsPerMinute_rawText = 60.0 * (str32len (his text.get()) / 5.0) / duration_soundTrimmed;
  476. my d_wordsPerMinute = Melder_ifloor (0.5 * (wordsPerMinute_rawTokens + wordsPerMinute_rawText));
  477. }
  478. autoTextGrid textgrid_synth, textgrid_synth_sounding;
  479. autoSound synth = SpeechSynthesizer_TextInterval_to_Sound (me, him, & textgrid_synth);
  480. /*
  481. For the synthesizer the silence threshold has to be < -30 dB, otherwise fricatives will not
  482. be found as sounding! This is ok since silences are almost at zero amplitudes for synthesized sounds.
  483. We also have to decrease the minimum silence and minimum sounding duration to catch, for example,
  484. the final plosive "t" from the synthesized sound "text".
  485. */
  486. double silenceThreshold_synth = -40.0, minSilenceDuration_synth = 0.05, minSoundingDuration_synth = 0.05;
  487. double startTimeOfSounding_synth, endTimeOfSounding_synth;
  488. autoSound synthTrimmed = Sound_trimSilencesAtStartAndEnd (synth.get(), 0.0, minPitch, timeStep, silenceThreshold_synth,
  489. minSilenceDuration_synth, minSoundingDuration_synth, & startTimeOfSounding_synth, & endTimeOfSounding_synth);
  490. double synthTrimmed_duration = synthTrimmed -> xmax - synthTrimmed -> xmin;
  491. bool hasSilence_synth = fabs (startTimeOfSounding_synth - synth -> xmin) > precision ||
  492. fabs (endTimeOfSounding_synth - synth -> xmax) > precision;
  493. if (hasSilence_synth) textgrid_synth_sounding =
  494. TextGrid_extractPart (textgrid_synth.get(), startTimeOfSounding_synth, endTimeOfSounding_synth, true);
  495. // compare the durations of the two sounds to get an indication of the slope constraint needed for the DTW
  496. double slope = duration_soundTrimmed / synthTrimmed_duration;
  497. slope = (slope > 1.0 ? slope : 1.0 / slope);
  498. int constraint = (slope < 1.5 ? 4 : slope < 2.0 ? 3 : slope < 3.0 ? 2 : 1); // TODO enums
  499. double analysisWidth = 0.02, dt = 0.005, band = 0.0;
  500. autoDTW dtw = Sounds_to_DTW ((hasSilence_sound ? soundTrimmed.get() : thee),
  501. (hasSilence_synth ? synthTrimmed.get() : synth.get()), analysisWidth, dt, band, constraint);
  502. autoTextGrid result = DTW_TextGrid_to_TextGrid (dtw.get(), (hasSilence_synth ? textgrid_synth_sounding.get() : textgrid_synth.get()), precision);
  503. if (hasSilence_sound) {
  504. if (startTimeOfSounding > thy xmin)
  505. TextGrid_setEarlierStartTime (result.get(), thy xmin, U"", U"");
  506. if (endTimeOfSounding < thy xmax || result -> xmax < thy xmax)
  507. TextGrid_setLaterEndTime (result.get(), thy xmax, U"", U"");
  508. }
  509. return result;
  510. } catch (MelderError) {
  511. Melder_throw (U"Sound and TextInterval not aligned.");
  512. }
  513. }
  514. /*
  515. typedef struct structAlignmentOfSoundAndTextStruct {
  516. double windowLength, timeStep; // analysis
  517. double f1_mel, fmax_mel, df_mel; // MelFilter
  518. integer numberOfMFCCCoefficients; // MFCC
  519. double dtw_cepstralWeight, dtw_logEnergyWeight; // MFCC -> DTW
  520. double dtw_regressionWeight, dtw_regressionlogEnergyWeight;
  521. double dtw_regressionWindowLength;
  522. double dtw_sakoeChibaBand, dtw_constraint;
  523. double silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration; // silence detection
  524. integer language, voicevariant, pitchAdjustment, pitchRange, wordsPerMinute; // synthesizer
  525. bool interpretPhonemeCodes, ipa, set_wordsPerMinute;
  526. double wordgap; // synthesizer
  527. } *SpeechSynthesizer_alignmentStruct;*/
  528. static autoTextGrid SpeechSynthesizer_Sound_TextInterval_align2 (SpeechSynthesizer me, Sound thee, TextInterval him, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
  529. try {
  530. Melder_require (thy xmin == his xmin && thy xmax == his xmax,
  531. U"Domains of Sound and TextGrid should be equal.");
  532. Melder_require (fabs (1.0 / thy dx - my d_samplingFrequency) < 1e-9,
  533. U"The sampling frequencies of the SpeechSynthesizer and the Sound should be equal.");
  534. conststring32 trimLabel = U"trim";
  535. // 1. trim the silences of the sound
  536. /*
  537. * For the synthesizer the silence threshold has to be < -30 dB, otherwise fricatives will not
  538. * be found as sounding! This is ok since silences are almost at zero amplitudes
  539. * We also have to decrease the minimum silence and minimum sounding duration to catch, for example,
  540. * the final plosive "t" from the word "text"
  541. *
  542. */
  543. double minPitch = 200, timeStep = 0.005, precision = thy dx;
  544. autoTextGrid thee_trimmer;
  545. autoSound thee_trimmed = Sound_trimSilences (thee, trimDuration, false, minPitch, timeStep, silenceThreshold, minSilenceDuration, minSoundingDuration, &thee_trimmer, trimLabel);
  546. // 2. synthesize the sound from the TextInterval
  547. autoTextGrid tg_syn;
  548. autoSound synth = SpeechSynthesizer_TextInterval_to_Sound (me, him, &tg_syn);
  549. // 3. There should be no silences in the synthesized sound except at the start and finish.
  550. // Set the wordwarp parameter to a small value like 0.001 s.
  551. // 4. Get DTW from the two sounds
  552. double analysisWidth = 0.02, dt = 0.005, band = 0.0;
  553. int constraint = 4;
  554. autoDTW dtw = Sounds_to_DTW (thee_trimmed.get(), synth.get(), analysisWidth, dt, band, constraint);
  555. // 6. Warp the synthesis TextGrid
  556. // first make domains equal, otherwsise the warper protests
  557. autoTextGrid warp = DTW_TextGrid_to_TextGrid (dtw.get(), tg_syn.get(), precision);
  558. // 7. Patch the trimmed intervals back into the warped TextGrid
  559. autoTextGrid result = TextGrid_IntervalTier_patch (warp.get(), (IntervalTier) thee_trimmer -> tiers->at [1], U"trim", 2 * thy dx);
  560. return result;
  561. } catch (MelderError) {
  562. Melder_throw (thee, U": sound and TextInterval not aligned.");
  563. }
  564. }
  565. autoTextGrid SpeechSynthesizer_Sound_IntervalTier_align (SpeechSynthesizer me, Sound thee, IntervalTier him, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
  566. try {
  567. IntervalTier_checkRange (him, istart, iend);
  568. TextInterval tib = his intervals.at [istart];
  569. TextInterval tie = his intervals.at [iend];
  570. Melder_require (tib -> xmin >= thy xmin && tie -> xmax <= thy xmax,
  571. U"The chosen interval(s) must lie within the sound.");
  572. OrderedOf<structTextGrid> textgrids;
  573. autoTextGrid result = TextGrid_create (tib -> xmin, tie -> xmax, U"sentence clause word phoneme", U"");
  574. for (integer iint = istart; iint <= iend; iint ++) {
  575. TextInterval ti = his intervals.at [iint];
  576. if (ti -> text && ti -> text [0] != U'\0') {
  577. autoSound sound = Sound_extractPart (thee, ti -> xmin, ti -> xmax, kSound_windowShape::RECTANGULAR, 1, true);
  578. autoTextGrid grid = SpeechSynthesizer_Sound_TextInterval_align (me, sound.get(), ti, silenceThreshold, minSilenceDuration, minSoundingDuration);
  579. textgrids. addItem_move (grid.move());
  580. }
  581. }
  582. Melder_require (textgrids.size > 0, U"Nothing could be aligned. Was your IntervalTier empty?");
  583. autoTextGrid aligned = TextGrids_to_TextGrid_appendContinuous (& textgrids, true);
  584. return aligned;
  585. } catch (MelderError) {
  586. Melder_throw (U"No aligned TextGrid created.");
  587. }
  588. }
  589. static autoTextGrid SpeechSynthesizer_Sound_IntervalTier_align2 (SpeechSynthesizer me, Sound thee, IntervalTier him, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
  590. try {
  591. IntervalTier_checkRange (him, istart, iend);
  592. TextInterval tb = his intervals.at [istart];
  593. TextInterval te = his intervals.at [iend];
  594. autoTextGrid result = TextGrid_create (tb -> xmin, te -> xmax, U"sentence clause word phoneme", U"");
  595. OrderedOf<structTextGrid> textgrids;
  596. for (integer iint = istart; iint <= iend; iint ++) {
  597. TextInterval ti = his intervals.at [iint];
  598. if (ti -> text && ti -> text [0] != U'\0') {
  599. autoSound sound = Sound_extractPart (thee, ti -> xmin, ti -> xmax, kSound_windowShape::RECTANGULAR, 1, true);
  600. autoTextGrid grid = SpeechSynthesizer_Sound_TextInterval_align2 (me, sound.get(), ti, silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration);
  601. textgrids. addItem_move (grid.move());
  602. }
  603. }
  604. Melder_require (textgrids.size > 0, U"Nothing could be aligned. Was your IntervalTier empty?");
  605. autoTextGrid aligned = TextGrids_to_TextGrid_appendContinuous (& textgrids, true);
  606. return aligned;
  607. } catch (MelderError) {
  608. Melder_throw (U"No aligned TextGrid created.");
  609. }
  610. }
  611. autoTextGrid SpeechSynthesizer_Sound_TextGrid_align (SpeechSynthesizer me, Sound thee, TextGrid him, integer tierNumber, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration) {
  612. try {
  613. Melder_require (thy xmin == his xmin && thy xmax == his xmax,
  614. U"The domains of the Sound and the TextGrid must be equal.");
  615. IntervalTier tier = TextGrid_checkSpecifiedTierIsIntervalTier (him, tierNumber);
  616. autoTextGrid grid = SpeechSynthesizer_Sound_IntervalTier_align (me, thee, tier, istart, iend, silenceThreshold, minSilenceDuration, minSoundingDuration);
  617. return grid;
  618. } catch (MelderError) {
  619. Melder_throw (me, U", ", thee, U", ", him, U": Cannot align.");
  620. }
  621. }
  622. autoTextGrid SpeechSynthesizer_Sound_TextGrid_align2 (SpeechSynthesizer me, Sound thee, TextGrid him, integer tierNumber, integer istart, integer iend, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double trimDuration) {
  623. try {//TODO: check not empty tier
  624. IntervalTier tier = TextGrid_checkSpecifiedTierIsIntervalTier (him, tierNumber);
  625. autoTextGrid grid = SpeechSynthesizer_Sound_IntervalTier_align2 (me, thee, tier, istart, iend, silenceThreshold, minSilenceDuration, minSoundingDuration, trimDuration);
  626. return grid;
  627. } catch (MelderError) {
  628. Melder_throw (U"");
  629. }
  630. }
  631. static autoStrings IntervalTier_to_Strings_withOriginData (IntervalTier me, integer *from) {
  632. try {
  633. autoStrings thee = Thing_new (Strings);
  634. thy strings = autostring32vector (my intervals.size);
  635. for (integer i = 1; i <= my intervals.size; i ++) {
  636. TextInterval ti = my intervals.at [i];
  637. if (ti -> text && ti -> text [0] != U'\0') {
  638. thy strings [++ thy numberOfStrings] = Melder_dup (ti -> text.get());
  639. if (from) {
  640. from [thy numberOfStrings] = i;
  641. }
  642. }
  643. }
  644. return thee;
  645. } catch (MelderError) {
  646. Melder_throw (me, U": no Strings created.");
  647. }
  648. }
  649. autoTable IntervalTiers_to_Table_textAlignmentment (IntervalTier target, IntervalTier source, EditCostsTable costs) {
  650. try {
  651. integer numberOfTargetIntervals = target -> intervals.size;
  652. integer numberOfSourceIntervals = source -> intervals.size;
  653. autoNUMvector<integer> targetOrigin (1, numberOfTargetIntervals);
  654. autoNUMvector<integer> sourceOrigin (1, numberOfSourceIntervals);
  655. autoStrings targets = IntervalTier_to_Strings_withOriginData (target, targetOrigin.peek());
  656. autoStrings sources = IntervalTier_to_Strings_withOriginData (source, sourceOrigin.peek());
  657. autoEditDistanceTable edit = EditDistanceTable_create (targets.get(), sources.get());
  658. if (costs != 0) {
  659. EditDistanceTable_setEditCosts (edit.get(), costs);
  660. EditDistanceTable_findPath (edit.get(), nullptr);
  661. }
  662. integer pathLength = edit -> warpingPath -> pathLength;
  663. autoTable thee = Table_createWithColumnNames (pathLength - 1, U"targetInterval targetText targetStart targetEnd sourceInterval sourceText sourceStart sourceEnd operation");
  664. for (integer i = 2; i <= pathLength; i++) {
  665. structPairOfInteger p = edit -> warpingPath -> path[i];
  666. structPairOfInteger p1 = edit -> warpingPath -> path[i - 1];
  667. double targetStart = undefined, targetEnd = undefined;
  668. double sourceStart = undefined, sourceEnd = undefined;
  669. conststring32 targetText = U"", sourceText = U"";
  670. integer targetInterval = p.y > 1 ? targetOrigin[p.y - 1] : 0;
  671. integer sourceInterval = p.x > 1 ? sourceOrigin[p.x - 1] : 0;
  672. if (targetInterval > 0) {
  673. TextInterval ti = target -> intervals.at [targetInterval];
  674. targetStart = ti -> xmin;
  675. targetEnd = ti -> xmax;
  676. targetText = ti -> text.get();
  677. }
  678. if (sourceInterval > 0) {
  679. TextInterval ti = source -> intervals.at [sourceInterval];
  680. sourceStart = ti -> xmin;
  681. sourceEnd = ti -> xmax;
  682. sourceText = ti -> text.get();
  683. }
  684. integer irow = i - 1;
  685. if (p.y == p1.y) { // deletion
  686. Table_setNumericValue (thee.get(), irow, 1, 0);
  687. Table_setStringValue (thee.get(), irow, 2, U"");
  688. Table_setNumericValue (thee.get(), irow, 3, undefined);
  689. Table_setNumericValue (thee.get(), irow, 4, undefined);
  690. Table_setNumericValue (thee.get(), irow, 5, sourceInterval);
  691. Table_setStringValue (thee.get(), irow, 6, sourceText);
  692. Table_setNumericValue (thee.get(), irow, 7, sourceStart);
  693. Table_setNumericValue (thee.get(), irow, 8, sourceEnd);
  694. Table_setStringValue (thee.get(), irow, 9, U"d");
  695. } else if (p.x == p1.x) { // insertion
  696. Table_setNumericValue (thee.get(), irow, 1, targetInterval);
  697. Table_setStringValue (thee.get(), irow, 2, targetText);
  698. Table_setNumericValue (thee.get(), irow, 3, targetStart);
  699. Table_setNumericValue (thee.get(), irow, 4, targetEnd);
  700. Table_setNumericValue (thee.get(), irow, 5, 0);
  701. Table_setStringValue (thee.get(), irow, 6, U"");
  702. Table_setNumericValue (thee.get(), irow, 7, undefined);
  703. Table_setNumericValue (thee.get(), irow, 8, undefined);
  704. Table_setStringValue (thee.get(), irow, 9, U"i");
  705. } else { // substitution ?
  706. Table_setNumericValue (thee.get(), irow, 1, targetInterval);
  707. Table_setStringValue (thee.get(), irow, 2, targetText);
  708. Table_setNumericValue (thee.get(), irow, 3, targetStart);
  709. Table_setNumericValue (thee.get(), irow, 4, targetEnd);
  710. Table_setNumericValue (thee.get(), irow, 5, sourceInterval);
  711. Table_setStringValue (thee.get(), irow, 6, sourceText);
  712. Table_setNumericValue (thee.get(), irow, 7, sourceStart);
  713. Table_setNumericValue (thee.get(), irow, 8, sourceEnd);
  714. Table_setStringValue (thee.get(), irow, 9, Melder_equ (targetText, sourceText) ? U" " : U"s");
  715. }
  716. }
  717. return thee;
  718. } catch (MelderError) {
  719. Melder_throw (target, U" and ", source, U" not aligned.");
  720. }
  721. }
  722. autoTable TextGrids_to_Table_textAlignmentment (TextGrid target, integer ttier, TextGrid source, integer stier, EditCostsTable costs) {
  723. try {
  724. IntervalTier targetTier = TextGrid_checkSpecifiedTierIsIntervalTier (target, ttier);
  725. IntervalTier sourceTier = TextGrid_checkSpecifiedTierIsIntervalTier (source, stier);
  726. return IntervalTiers_to_Table_textAlignmentment (targetTier, sourceTier, costs);
  727. } catch (MelderError) {
  728. Melder_throw (U"No text alignment table created from TextGrids ", target, U" and ", source, U".");
  729. }
  730. }
  731. // End of file TextGrid_and_SpeechSynthesizer.cpp