TDStretch.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. ///////////////////////////////////////////////////////////////////////////////
  2. ///
  3. /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
  4. /// while maintaining the original pitch by using a time domain WSOLA-like
  5. /// method with several performance-increasing tweaks.
  6. ///
  7. /// Notes : MMX optimized functions reside in a separate, platform-specific
  8. /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
  9. ///
  10. /// This source file contains OpenMP optimizations that allow speeding up the
  11. /// corss-correlation algorithm by executing it in several threads / CPU cores
  12. /// in parallel. See the following article link for more detailed discussion
  13. /// about SoundTouch OpenMP optimizations:
  14. /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
  15. ///
  16. /// Author : Copyright (c) Olli Parviainen
  17. /// Author e-mail : oparviai 'at' iki.fi
  18. /// SoundTouch WWW: http://www.surina.net/soundtouch
  19. ///
  20. ////////////////////////////////////////////////////////////////////////////////
  21. //
  22. // License :
  23. //
  24. // SoundTouch audio processing library
  25. // Copyright (c) Olli Parviainen
  26. //
  27. // This library is free software; you can redistribute it and/or
  28. // modify it under the terms of the GNU Lesser General Public
  29. // License as published by the Free Software Foundation; either
  30. // version 2.1 of the License, or (at your option) any later version.
  31. //
  32. // This library is distributed in the hope that it will be useful,
  33. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  34. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  35. // Lesser General Public License for more details.
  36. //
  37. // You should have received a copy of the GNU Lesser General Public
  38. // License along with this library; if not, write to the Free Software
  39. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  40. //
  41. ////////////////////////////////////////////////////////////////////////////////
  42. #include <string.h>
  43. #include <limits.h>
  44. #include <assert.h>
  45. #include <math.h>
  46. #include <float.h>
  47. #include "STTypes.h"
  48. #include "cpu_detect.h"
  49. #include "TDStretch.h"
  50. using namespace soundtouch;
  51. #define max(x, y) (((x) > (y)) ? (x) : (y))
  52. /*****************************************************************************
  53. *
  54. * Implementation of the class 'TDStretch'
  55. *
  56. *****************************************************************************/
  57. TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
  58. {
  59. bQuickSeek = false;
  60. channels = 2;
  61. pMidBuffer = nullptr;
  62. pMidBufferUnaligned = nullptr;
  63. overlapLength = 0;
  64. bAutoSeqSetting = true;
  65. bAutoSeekSetting = true;
  66. tempo = 1.0f;
  67. setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
  68. setTempo(1.0f);
  69. clear();
  70. }
  71. TDStretch::~TDStretch()
  72. {
  73. delete[] pMidBufferUnaligned;
  74. }
  75. // Sets routine control parameters. These control are certain time constants
  76. // defining how the sound is stretched to the desired duration.
  77. //
  78. // 'sampleRate' = sample rate of the sound
  79. // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
  80. // 'seekwindowMS' = seeking window length for scanning the best overlapping
  81. // position (default = 28 ms)
  82. // 'overlapMS' = overlapping length (default = 12 ms)
  83. void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
  84. int aSeekWindowMS, int aOverlapMS)
  85. {
  86. // accept only positive parameter values - if zero or negative, use old values instead
  87. if (aSampleRate > 0)
  88. {
  89. if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
  90. this->sampleRate = aSampleRate;
  91. }
  92. if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
  93. if (aSequenceMS > 0)
  94. {
  95. this->sequenceMs = aSequenceMS;
  96. bAutoSeqSetting = false;
  97. }
  98. else if (aSequenceMS == 0)
  99. {
  100. // if zero, use automatic setting
  101. bAutoSeqSetting = true;
  102. }
  103. if (aSeekWindowMS > 0)
  104. {
  105. this->seekWindowMs = aSeekWindowMS;
  106. bAutoSeekSetting = false;
  107. }
  108. else if (aSeekWindowMS == 0)
  109. {
  110. // if zero, use automatic setting
  111. bAutoSeekSetting = true;
  112. }
  113. calcSeqParameters();
  114. calculateOverlapLength(overlapMs);
  115. // set tempo to recalculate 'sampleReq'
  116. setTempo(tempo);
  117. }
  118. /// Get routine control parameters, see setParameters() function.
  119. /// Any of the parameters to this function can be nullptr, in such case corresponding parameter
  120. /// value isn't returned.
  121. void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
  122. {
  123. if (pSampleRate)
  124. {
  125. *pSampleRate = sampleRate;
  126. }
  127. if (pSequenceMs)
  128. {
  129. *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
  130. }
  131. if (pSeekWindowMs)
  132. {
  133. *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
  134. }
  135. if (pOverlapMs)
  136. {
  137. *pOverlapMs = overlapMs;
  138. }
  139. }
  140. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  141. void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
  142. {
  143. int i;
  144. SAMPLETYPE m1, m2;
  145. m1 = (SAMPLETYPE)0;
  146. m2 = (SAMPLETYPE)overlapLength;
  147. for (i = 0; i < overlapLength ; i ++)
  148. {
  149. pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
  150. m1 += 1;
  151. m2 -= 1;
  152. }
  153. }
  154. void TDStretch::clearMidBuffer()
  155. {
  156. memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
  157. }
  158. void TDStretch::clearInput()
  159. {
  160. inputBuffer.clear();
  161. clearMidBuffer();
  162. isBeginning = true;
  163. maxnorm = 0;
  164. maxnormf = 1e8;
  165. skipFract = 0;
  166. }
  167. // Clears the sample buffers
  168. void TDStretch::clear()
  169. {
  170. outputBuffer.clear();
  171. clearInput();
  172. }
  173. // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
  174. // to enable
  175. void TDStretch::enableQuickSeek(bool enable)
  176. {
  177. bQuickSeek = enable;
  178. }
  179. // Returns nonzero if the quick seeking algorithm is enabled.
  180. bool TDStretch::isQuickSeekEnabled() const
  181. {
  182. return bQuickSeek;
  183. }
  184. // Seeks for the optimal overlap-mixing position.
  185. int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
  186. {
  187. if (bQuickSeek)
  188. {
  189. return seekBestOverlapPositionQuick(refPos);
  190. }
  191. else
  192. {
  193. return seekBestOverlapPositionFull(refPos);
  194. }
  195. }
  196. // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
  197. // of 'ovlPos'.
  198. inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
  199. {
  200. #ifndef USE_MULTICH_ALWAYS
  201. if (channels == 1)
  202. {
  203. // mono sound.
  204. overlapMono(pOutput, pInput + ovlPos);
  205. }
  206. else if (channels == 2)
  207. {
  208. // stereo sound
  209. overlapStereo(pOutput, pInput + 2 * ovlPos);
  210. }
  211. else
  212. #endif // USE_MULTICH_ALWAYS
  213. {
  214. assert(channels > 0);
  215. overlapMulti(pOutput, pInput + channels * ovlPos);
  216. }
  217. }
  218. // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
  219. // routine
  220. //
  221. // The best position is determined as the position where the two overlapped
  222. // sample sequences are 'most alike', in terms of the highest cross-correlation
  223. // value over the overlapping period
  224. int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
  225. {
  226. int bestOffs;
  227. double bestCorr;
  228. int i;
  229. double norm;
  230. bestCorr = -FLT_MAX;
  231. bestOffs = 0;
  232. // Scans for the best correlation value by testing each possible position
  233. // over the permitted range.
  234. bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
  235. bestCorr = (bestCorr + 0.1) * 0.75;
  236. #pragma omp parallel for
  237. for (i = 1; i < seekLength; i ++)
  238. {
  239. double corr;
  240. // Calculates correlation value for the mixing position corresponding to 'i'
  241. #if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
  242. // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
  243. // iterate the loop in sequential order
  244. // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
  245. corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
  246. #else
  247. // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
  248. // as "calcCrossCorr", but saves time by reusing & updating previously stored
  249. // "norm" value
  250. corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
  251. #endif
  252. // heuristic rule to slightly favour values close to mid of the range
  253. double tmp = (double)(2 * i - seekLength) / (double)seekLength;
  254. corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
  255. // Checks for the highest correlation value
  256. if (corr > bestCorr)
  257. {
  258. // For optimal performance, enter critical section only in case that best value found.
  259. // in such case repeat 'if' condition as it's possible that parallel execution may have
  260. // updated the bestCorr value in the mean time
  261. #pragma omp critical
  262. if (corr > bestCorr)
  263. {
  264. bestCorr = corr;
  265. bestOffs = i;
  266. }
  267. }
  268. }
  269. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  270. adaptNormalizer();
  271. #endif
  272. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  273. clearCrossCorrState();
  274. return bestOffs;
  275. }
  276. // Quick seek algorithm for improved runtime-performance: First roughly scans through the
  277. // correlation area, and then scan surroundings of two best preliminary correlation candidates
  278. // with improved precision
  279. //
  280. // Based on testing:
  281. // - This algorithm gives on average 99% as good match as the full algorithm
  282. // - this quick seek algorithm finds the best match on ~90% of cases
  283. // - on those 10% of cases when this algorithm doesn't find best match,
  284. // it still finds on average ~90% match vs. the best possible match
  285. int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
  286. {
  287. #define _MIN(a, b) (((a) < (b)) ? (a) : (b))
  288. #define SCANSTEP 16
  289. #define SCANWIND 8
  290. int bestOffs;
  291. int i;
  292. int bestOffs2;
  293. float bestCorr, corr;
  294. float bestCorr2;
  295. double norm;
  296. // note: 'float' types used in this function in case that the platform would need to use software-fp
  297. bestCorr =
  298. bestCorr2 = -FLT_MAX;
  299. bestOffs =
  300. bestOffs2 = SCANWIND;
  301. // Scans for the best correlation value by testing each possible position
  302. // over the permitted range. Look for two best matches on the first pass to
  303. // increase possibility of ideal match.
  304. //
  305. // Begin from "SCANSTEP" instead of SCANWIND to make the calculation
  306. // catch the 'middlepoint' of seekLength vector as that's the a-priori
  307. // expected best match position
  308. //
  309. // Roughly:
  310. // - 15% of cases find best result directly on the first round,
  311. // - 75% cases find better match on 2nd round around the best match from 1st round
  312. // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
  313. for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
  314. {
  315. // Calculates correlation value for the mixing position corresponding
  316. // to 'i'
  317. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  318. // heuristic rule to slightly favour values close to mid of the seek range
  319. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  320. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  321. // Checks for the highest correlation value
  322. if (corr > bestCorr)
  323. {
  324. // found new best match. keep the previous best as 2nd best match
  325. bestCorr2 = bestCorr;
  326. bestOffs2 = bestOffs;
  327. bestCorr = corr;
  328. bestOffs = i;
  329. }
  330. else if (corr > bestCorr2)
  331. {
  332. // not new best, but still new 2nd best match
  333. bestCorr2 = corr;
  334. bestOffs2 = i;
  335. }
  336. }
  337. // Scans surroundings of the found best match with small stepping
  338. int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
  339. for (i = bestOffs - SCANWIND; i < end; i++)
  340. {
  341. if (i == bestOffs) continue; // this offset already calculated, thus skip
  342. // Calculates correlation value for the mixing position corresponding
  343. // to 'i'
  344. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  345. // heuristic rule to slightly favour values close to mid of the range
  346. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  347. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  348. // Checks for the highest correlation value
  349. if (corr > bestCorr)
  350. {
  351. bestCorr = corr;
  352. bestOffs = i;
  353. }
  354. }
  355. // Scans surroundings of the 2nd best match with small stepping
  356. end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
  357. for (i = bestOffs2 - SCANWIND; i < end; i++)
  358. {
  359. if (i == bestOffs2) continue; // this offset already calculated, thus skip
  360. // Calculates correlation value for the mixing position corresponding
  361. // to 'i'
  362. corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
  363. // heuristic rule to slightly favour values close to mid of the range
  364. float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
  365. corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
  366. // Checks for the highest correlation value
  367. if (corr > bestCorr)
  368. {
  369. bestCorr = corr;
  370. bestOffs = i;
  371. }
  372. }
  373. // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
  374. clearCrossCorrState();
  375. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  376. adaptNormalizer();
  377. #endif
  378. return bestOffs;
  379. }
  380. /// For integer algorithm: adapt normalization factor divider with music so that
  381. /// it'll not be pessimistically restrictive that can degrade quality on quieter sections
  382. /// yet won't cause integer overflows either
  383. void TDStretch::adaptNormalizer()
  384. {
  385. // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
  386. // too low values during pauses in music
  387. if ((maxnorm > 1000) || (maxnormf > 40000000))
  388. {
  389. //norm averaging filter
  390. maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
  391. if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
  392. {
  393. // large values, so increase divider
  394. overlapDividerBitsNorm++;
  395. if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
  396. }
  397. else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
  398. {
  399. // extra small values, decrease divider
  400. overlapDividerBitsNorm--;
  401. }
  402. }
  403. maxnorm = 0;
  404. }
  405. /// clear cross correlation routine state if necessary
  406. void TDStretch::clearCrossCorrState()
  407. {
  408. // default implementation is empty.
  409. }
  410. /// Calculates processing sequence length according to tempo setting
  411. void TDStretch::calcSeqParameters()
  412. {
  413. // Adjust tempo param according to tempo, so that variating processing sequence length is used
  414. // at various tempo settings, between the given low...top limits
  415. #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
  416. #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
  417. // sequence-ms setting values at above low & top tempo
  418. #define AUTOSEQ_AT_MIN 90.0
  419. #define AUTOSEQ_AT_MAX 40.0
  420. #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  421. #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
  422. // seek-window-ms setting values at above low & top tempoq
  423. #define AUTOSEEK_AT_MIN 20.0
  424. #define AUTOSEEK_AT_MAX 15.0
  425. #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
  426. #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
  427. #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
  428. double seq, seek;
  429. if (bAutoSeqSetting)
  430. {
  431. seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
  432. seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
  433. sequenceMs = (int)(seq + 0.5);
  434. }
  435. if (bAutoSeekSetting)
  436. {
  437. seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
  438. seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
  439. seekWindowMs = (int)(seek + 0.5);
  440. }
  441. // Update seek window lengths
  442. seekWindowLength = (sampleRate * sequenceMs) / 1000;
  443. if (seekWindowLength < 2 * overlapLength)
  444. {
  445. seekWindowLength = 2 * overlapLength;
  446. }
  447. seekLength = (sampleRate * seekWindowMs) / 1000;
  448. }
  449. // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
  450. // tempo, larger faster tempo.
  451. void TDStretch::setTempo(double newTempo)
  452. {
  453. int intskip;
  454. tempo = newTempo;
  455. // Calculate new sequence duration
  456. calcSeqParameters();
  457. // Calculate ideal skip length (according to tempo value)
  458. nominalSkip = tempo * (seekWindowLength - overlapLength);
  459. intskip = (int)(nominalSkip + 0.5);
  460. // Calculate how many samples are needed in the 'inputBuffer' to
  461. // process another batch of samples
  462. //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
  463. sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
  464. }
  465. // Sets the number of channels, 1 = mono, 2 = stereo
  466. void TDStretch::setChannels(int numChannels)
  467. {
  468. if (!verifyNumberOfChannels(numChannels) ||
  469. (channels == numChannels)) return;
  470. channels = numChannels;
  471. inputBuffer.setChannels(channels);
  472. outputBuffer.setChannels(channels);
  473. // re-init overlap/buffer
  474. overlapLength=0;
  475. setParameters(sampleRate);
  476. }
  477. // nominal tempo, no need for processing, just pass the samples through
  478. // to outputBuffer
  479. /*
  480. void TDStretch::processNominalTempo()
  481. {
  482. assert(tempo == 1.0f);
  483. if (bMidBufferDirty)
  484. {
  485. // If there are samples in pMidBuffer waiting for overlapping,
  486. // do a single sliding overlapping with them in order to prevent a
  487. // clicking distortion in the output sound
  488. if (inputBuffer.numSamples() < overlapLength)
  489. {
  490. // wait until we've got overlapLength input samples
  491. return;
  492. }
  493. // Mix the samples in the beginning of 'inputBuffer' with the
  494. // samples in 'midBuffer' using sliding overlapping
  495. overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
  496. outputBuffer.putSamples(overlapLength);
  497. inputBuffer.receiveSamples(overlapLength);
  498. clearMidBuffer();
  499. // now we've caught the nominal sample flow and may switch to
  500. // bypass mode
  501. }
  502. // Simply bypass samples from input to output
  503. outputBuffer.moveSamples(inputBuffer);
  504. }
  505. */
  506. // Processes as many processing frames of the samples 'inputBuffer', store
  507. // the result into 'outputBuffer'
  508. void TDStretch::processSamples()
  509. {
  510. int ovlSkip;
  511. int offset = 0;
  512. int temp;
  513. /* Removed this small optimization - can introduce a click to sound when tempo setting
  514. crosses the nominal value
  515. if (tempo == 1.0f)
  516. {
  517. // tempo not changed from the original, so bypass the processing
  518. processNominalTempo();
  519. return;
  520. }
  521. */
  522. // Process samples as long as there are enough samples in 'inputBuffer'
  523. // to form a processing frame.
  524. while ((int)inputBuffer.numSamples() >= sampleReq)
  525. {
  526. if (isBeginning == false)
  527. {
  528. // apart from the very beginning of the track,
  529. // scan for the best overlapping position & do overlap-add
  530. offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
  531. // Mix the samples in the 'inputBuffer' at position of 'offset' with the
  532. // samples in 'midBuffer' using sliding overlapping
  533. // ... first partially overlap with the end of the previous sequence
  534. // (that's in 'midBuffer')
  535. overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
  536. outputBuffer.putSamples((uint)overlapLength);
  537. offset += overlapLength;
  538. }
  539. else
  540. {
  541. // Adjust processing offset at beginning of track by not perform initial overlapping
  542. // and compensating that in the 'input buffer skip' calculation
  543. isBeginning = false;
  544. int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
  545. #ifdef ST_SIMD_AVOID_UNALIGNED
  546. // in SIMD mode, round the skip amount to value corresponding to aligned memory address
  547. if (channels == 1)
  548. {
  549. skip &= -4;
  550. }
  551. else if (channels == 2)
  552. {
  553. skip &= -2;
  554. }
  555. #endif
  556. skipFract -= skip;
  557. if (skipFract <= -nominalSkip)
  558. {
  559. skipFract = -nominalSkip;
  560. }
  561. }
  562. // ... then copy sequence samples from 'inputBuffer' to output:
  563. // crosscheck that we don't have buffer overflow...
  564. if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
  565. {
  566. continue; // just in case, shouldn't really happen
  567. }
  568. // length of sequence
  569. temp = (seekWindowLength - 2 * overlapLength);
  570. outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
  571. // Copies the end of the current sequence from 'inputBuffer' to
  572. // 'midBuffer' for being mixed with the beginning of the next
  573. // processing sequence and so on
  574. assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
  575. memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
  576. channels * sizeof(SAMPLETYPE) * overlapLength);
  577. // Remove the processed samples from the input buffer. Update
  578. // the difference between integer & nominal skip step to 'skipFract'
  579. // in order to prevent the error from accumulating over time.
  580. skipFract += nominalSkip; // real skip size
  581. ovlSkip = (int)skipFract; // rounded to integer skip
  582. skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
  583. inputBuffer.receiveSamples((uint)ovlSkip);
  584. }
  585. }
  586. // Adds 'numsamples' pcs of samples from the 'samples' memory position into
  587. // the input of the object.
  588. void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
  589. {
  590. // Add the samples into the input buffer
  591. inputBuffer.putSamples(samples, nSamples);
  592. // Process the samples in input buffer
  593. processSamples();
  594. }
  595. /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
  596. void TDStretch::acceptNewOverlapLength(int newOverlapLength)
  597. {
  598. int prevOvl;
  599. assert(newOverlapLength >= 0);
  600. prevOvl = overlapLength;
  601. overlapLength = newOverlapLength;
  602. if (overlapLength > prevOvl)
  603. {
  604. delete[] pMidBufferUnaligned;
  605. pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
  606. // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
  607. pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
  608. clearMidBuffer();
  609. }
  610. }
  611. // Operator 'new' is overloaded so that it automatically creates a suitable instance
  612. // depending on if we've a MMX/SSE/etc-capable CPU available or not.
  613. void * TDStretch::operator new(size_t)
  614. {
  615. // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
  616. ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
  617. return newInstance();
  618. }
  619. TDStretch * TDStretch::newInstance()
  620. {
  621. uint uExtensions;
  622. uExtensions = detectCPUextensions();
  623. // Check if MMX/SSE instruction set extensions supported by CPU
  624. #ifdef SOUNDTOUCH_ALLOW_MMX
  625. // MMX routines available only with integer sample types
  626. if (uExtensions & SUPPORT_MMX)
  627. {
  628. return ::new TDStretchMMX;
  629. }
  630. else
  631. #endif // SOUNDTOUCH_ALLOW_MMX
  632. #ifdef SOUNDTOUCH_ALLOW_SSE
  633. if (uExtensions & SUPPORT_SSE)
  634. {
  635. // SSE support
  636. return ::new TDStretchSSE;
  637. }
  638. else
  639. #endif // SOUNDTOUCH_ALLOW_SSE
  640. {
  641. // ISA optimizations not supported, use plain C version
  642. return ::new TDStretch;
  643. }
  644. }
  645. //////////////////////////////////////////////////////////////////////////////
  646. //
  647. // Integer arithmetic specific algorithm implementations.
  648. //
  649. //////////////////////////////////////////////////////////////////////////////
  650. #ifdef SOUNDTOUCH_INTEGER_SAMPLES
  651. // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
  652. // version of the routine.
  653. void TDStretch::overlapStereo(short *poutput, const short *input) const
  654. {
  655. int i;
  656. short temp;
  657. int cnt2;
  658. for (i = 0; i < overlapLength ; i ++)
  659. {
  660. temp = (short)(overlapLength - i);
  661. cnt2 = 2 * i;
  662. poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
  663. poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
  664. }
  665. }
  666. // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
  667. // version of the routine.
  668. void TDStretch::overlapMulti(short *poutput, const short *input) const
  669. {
  670. short m1;
  671. int i = 0;
  672. for (m1 = 0; m1 < overlapLength; m1 ++)
  673. {
  674. short m2 = (short)(overlapLength - m1);
  675. for (int c = 0; c < channels; c ++)
  676. {
  677. poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
  678. i++;
  679. }
  680. }
  681. }
  682. // Calculates the x having the closest 2^x value for the given value
  683. static int _getClosest2Power(double value)
  684. {
  685. return (int)(log(value) / log(2.0) + 0.5);
  686. }
  687. /// Calculates overlap period length in samples.
  688. /// Integer version rounds overlap length to closest power of 2
  689. /// for a divide scaling operation.
  690. void TDStretch::calculateOverlapLength(int aoverlapMs)
  691. {
  692. int newOvl;
  693. assert(aoverlapMs >= 0);
  694. // calculate overlap length so that it's power of 2 - thus it's easy to do
  695. // integer division by right-shifting. Term "-1" at end is to account for
  696. // the extra most significatnt bit left unused in result by signed multiplication
  697. overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
  698. if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
  699. if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
  700. newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above
  701. acceptNewOverlapLength(newOvl);
  702. overlapDividerBitsNorm = overlapDividerBitsPure;
  703. // calculate sloping divider so that crosscorrelation operation won't
  704. // overflow 32-bit register. Max. sum of the crosscorrelation sum without
  705. // divider would be 2^30*(N^3-N)/3, where N = overlap length
  706. slopingDivider = (newOvl * newOvl - 1) / 3;
  707. }
  708. double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
  709. {
  710. long corr;
  711. unsigned long lnorm;
  712. int i;
  713. #ifdef ST_SIMD_AVOID_UNALIGNED
  714. // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
  715. if (((ulongptr)mixingPos) & 15) return -1e50;
  716. #endif
  717. // hint compiler autovectorization that loop length is divisible by 8
  718. int ilength = (channels * overlapLength) & -8;
  719. corr = lnorm = 0;
  720. // Same routine for stereo and mono
  721. for (i = 0; i < ilength; i += 2)
  722. {
  723. corr += (mixingPos[i] * compare[i] +
  724. mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
  725. lnorm += (mixingPos[i] * mixingPos[i] +
  726. mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
  727. // do intermediate scalings to avoid integer overflow
  728. }
  729. if (lnorm > maxnorm)
  730. {
  731. // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
  732. #pragma omp critical
  733. if (lnorm > maxnorm)
  734. {
  735. maxnorm = lnorm;
  736. }
  737. }
  738. // Normalize result by dividing by sqrt(norm) - this step is easiest
  739. // done using floating point operation
  740. norm = (double)lnorm;
  741. return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
  742. }
  743. /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
  744. double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
  745. {
  746. long corr;
  747. long lnorm;
  748. int i;
  749. // hint compiler autovectorization that loop length is divisible by 8
  750. int ilength = (channels * overlapLength) & -8;
  751. // cancel first normalizer tap from previous round
  752. lnorm = 0;
  753. for (i = 1; i <= channels; i ++)
  754. {
  755. lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
  756. }
  757. corr = 0;
  758. // Same routine for stereo and mono.
  759. for (i = 0; i < ilength; i += 2)
  760. {
  761. corr += (mixingPos[i] * compare[i] +
  762. mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
  763. }
  764. // update normalizer with last samples of this round
  765. for (int j = 0; j < channels; j ++)
  766. {
  767. i --;
  768. lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
  769. }
  770. norm += (double)lnorm;
  771. if (norm > maxnorm)
  772. {
  773. maxnorm = (unsigned long)norm;
  774. }
  775. // Normalize result by dividing by sqrt(norm) - this step is easiest
  776. // done using floating point operation
  777. return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
  778. }
  779. #endif // SOUNDTOUCH_INTEGER_SAMPLES
  780. //////////////////////////////////////////////////////////////////////////////
  781. //
  782. // Floating point arithmetic specific algorithm implementations.
  783. //
  784. #ifdef SOUNDTOUCH_FLOAT_SAMPLES
  785. // Overlaps samples in 'midBuffer' with the samples in 'pInput'
  786. void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
  787. {
  788. int i;
  789. float fScale;
  790. float f1;
  791. float f2;
  792. fScale = 1.0f / (float)overlapLength;
  793. f1 = 0;
  794. f2 = 1.0f;
  795. for (i = 0; i < 2 * (int)overlapLength ; i += 2)
  796. {
  797. pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
  798. pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
  799. f1 += fScale;
  800. f2 -= fScale;
  801. }
  802. }
  803. // Overlaps samples in 'midBuffer' with the samples in 'input'.
  804. void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
  805. {
  806. int i;
  807. float fScale;
  808. float f1;
  809. float f2;
  810. fScale = 1.0f / (float)overlapLength;
  811. f1 = 0;
  812. f2 = 1.0f;
  813. i=0;
  814. for (int i2 = 0; i2 < overlapLength; i2 ++)
  815. {
  816. // note: Could optimize this slightly by taking into account that always channels > 2
  817. for (int c = 0; c < channels; c ++)
  818. {
  819. pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
  820. i++;
  821. }
  822. f1 += fScale;
  823. f2 -= fScale;
  824. }
  825. }
  826. /// Calculates overlapInMsec period length in samples.
  827. void TDStretch::calculateOverlapLength(int overlapInMsec)
  828. {
  829. int newOvl;
  830. assert(overlapInMsec >= 0);
  831. newOvl = (sampleRate * overlapInMsec) / 1000;
  832. if (newOvl < 16) newOvl = 16;
  833. // must be divisible by 8
  834. newOvl -= newOvl % 8;
  835. acceptNewOverlapLength(newOvl);
  836. }
  837. /// Calculate cross-correlation
  838. double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
  839. {
  840. float corr;
  841. float norm;
  842. int i;
  843. #ifdef ST_SIMD_AVOID_UNALIGNED
  844. // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
  845. if (((ulongptr)mixingPos) & 15) return -1e50;
  846. #endif
  847. // hint compiler autovectorization that loop length is divisible by 8
  848. int ilength = (channels * overlapLength) & -8;
  849. corr = norm = 0;
  850. // Same routine for stereo and mono
  851. for (i = 0; i < ilength; i ++)
  852. {
  853. corr += mixingPos[i] * compare[i];
  854. norm += mixingPos[i] * mixingPos[i];
  855. }
  856. anorm = norm;
  857. return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
  858. }
  859. /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
  860. double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
  861. {
  862. float corr;
  863. int i;
  864. corr = 0;
  865. // cancel first normalizer tap from previous round
  866. for (i = 1; i <= channels; i ++)
  867. {
  868. norm -= mixingPos[-i] * mixingPos[-i];
  869. }
  870. // hint compiler autovectorization that loop length is divisible by 8
  871. int ilength = (channels * overlapLength) & -8;
  872. // Same routine for stereo and mono
  873. for (i = 0; i < ilength; i ++)
  874. {
  875. corr += mixingPos[i] * compare[i];
  876. }
  877. // update normalizer with last samples of this round
  878. for (int j = 0; j < channels; j ++)
  879. {
  880. i --;
  881. norm += mixingPos[i] * mixingPos[i];
  882. }
  883. return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
  884. }
  885. #endif // SOUNDTOUCH_FLOAT_SAMPLES