manual_BSS.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /* manual_BSS.cpp
  2. *
  3. * Copyright (C) 2010-2014, 2015 David Weenink
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /*
  19. djmw 20101227 Initial version
  20. */
  21. #include "ManPagesM.h"
  22. void manual_BSS (ManPages me);
  23. void manual_BSS (ManPages me)
  24. {
  25. MAN_BEGIN (U"CrossCorrelationTable", U"djmw", 20170908)
  26. INTRO (U"One of the types of objects in Praat. A ##CrossCorrelationTable# represents the cross-correlations between "
  27. "a number of signals. Cell [%i,%j] of a CrossCorrelationTable contains the cross-correlation between the %i-th "
  28. "and the %j-th signal. For example, the CrossCorrelationTable of an %n-channel sound is a %n\\xx%n table where "
  29. "the number in cell [%i,%j] is the cross-correlation of channel %i with channel %j (for a particular lag time %\\ta).")
  30. NORMAL (U"A CrossCorrelationTable has a square matrix whose cells contain the cross-correlations between "
  31. "the signals and a centroid vector with the average value of each signal.")
  32. ENTRY (U"Remarks")
  33. NORMAL (U"Sometimes in the statistical literature, the cross-correlation between signals is also called "
  34. "\"covariance\". However, the only thing a @@Covariance@ has in common with a CrossCorrelationTable is that "
  35. "both are symmetric matrices. The differences between a CrossCorrelationTable and a Covariance are:")
  36. TAG (U"1. A Covariance matrix is always positive-definite; for a cross-correlation table this is only guaranteed if "
  37. "the lag time %\\ta = 0.")
  38. TAG (U"2. The elements %%c__ij_% in a Covariance always satisfy |%%c__ij_%/\\Vr(%%c__ii_%\\.c%%c__jj_%)| \\<_ 1; this is "
  39. "generally not the case for cross-correlations.")
  40. MAN_END
  41. MAN_BEGIN (U"CrossCorrelationTableList", U"djmw", 20101227)
  42. INTRO (U"One of the types of objects in Praat. A CrossCorrelationTableList represents a collection of @@CrossCorrelationTable@ objects.")
  43. MAN_END
  44. MAN_BEGIN (U"CrossCorrelationTableList: Create test set...", U"djmw", 20110212)
  45. INTRO (U"Create a collection of @@CrossCorrelationTable@s that are all derived from different diagonal matrices by the same transformation matrix.")
  46. ENTRY (U"Settings")
  47. SCRIPT (5.4, Manual_SETTINGS_WINDOW_HEIGHT (4), U""
  48. Manual_DRAW_SETTINGS_WINDOW ("CrossCorrelationTableList: Create test set", 4)
  49. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Matrix dimension", "5")
  50. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Number of matrices", "20")
  51. Manual_DRAW_SETTINGS_WINDOW_BOOLEAN("First is positive-definite",1)
  52. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Sigma", "0.02")
  53. )
  54. TAG (U"##Matrix dimension")
  55. DEFINITION (U"determines the size of the square matrix with cross-correlations.")
  56. TAG (U"##Number of matrices")
  57. DEFINITION (U"determines the number of matrices that have to be generated.")
  58. TAG (U"##First is positive-definite")
  59. DEFINITION (U"guarantees that the first matrix of the series is positive definite.")
  60. TAG (U"##Sigma")
  61. DEFINITION (U"the standard deviation of the noise that is added to each transformation matrix element. A value "
  62. "of zero makes all the cross-correlation matrices jointly diagonalizable. A value greater than zero "
  63. "makes each transformation matrix a little different and the collection not jointly "
  64. "diagonalizable anymore.")
  65. ENTRY (U"Algorithm")
  66. NORMAL (U"All the CrossCorrelationTable matrices are generated as #V\\'p\\.c#D__%k_\\.c #V, where #D__%k_ is a diagonal matrix "
  67. "with entries randomly choosen from the [-1,1] interval. The matrix #V is a \"random\" orthogonal matrix "
  68. "obtained from the singular value decomposition of a matrix #M = #U\\.c#D\\.c#V\\'p, where the cells of the "
  69. "matrix #M are random Gaussian numbers with mean 0 and standard deviation 1.")
  70. NORMAL (U"If the first matrix has to be positive definite, the numbers on the diagonal of #D__1_ are randomly "
  71. "chosen from the [0.1,1] interval.")
  72. MAN_END
  73. MAN_BEGIN (U"Create simple MixingMatrix...", U"djmw", 20170908)
  74. INTRO (U"Create a new @@MixingMatrix@.")
  75. ENTRY (U"Settings")
  76. TAG (U"##Number of inputs")
  77. DEFINITION (U"defines the number of input channels, i.e. the number of columns in the matrix.")
  78. TAG (U"##Number of outputs")
  79. DEFINITION (U"defines the number of output channels, i.e. the number of rows in the matrix.")
  80. TAG (U"##Mixing coefficients")
  81. DEFINITION (U"define the coefficients. You input them row-wise. There have to be %%numberOfOutPuts% \\xx %%numberOfInputs% values. ")
  82. MAN_END
  83. MAN_BEGIN (U"MixingMatrix", U"djmw", 20170907)
  84. INTRO (U"One of the @@Types of objects|type of Objects@ in Praat. A ##MixingMatrix# shows a mapping of the channels of an input @Sound to the channels of an output Sound. A channel in the input sound is called an %%input% channel. Each output channel is a linear combination of input channels. ")
  85. NORMAL (U"The mixing of input channels can be written as the matrix multiplication ##R=M\\.cS#. Here #S is the matrix that represents the input sound, with %numberOfInputs% rows and %%numberOfSamples% columns. Each row in #S corresponds to one input channel. #M is the %%numberOfOutputChannels%\\xx %%numberOfInputs% MixingMatrix and #R is the %%numberOfOutputChannels%\\xx %%numberOfSamples% matrix that is the result of the mixing.")
  86. NORMAL (U"Row %i in the MixingMatrix #M therefore represents the weights %m__%ij_ of the different input channels %j in output channel %i, the number of rows of #M determines the number of output channels in the resulting #R. Column %j in #M represents the weight factors %m__%ij_ of input %j in the different output channels %i.")
  87. ENTRY (U"Examples")
  88. NORMAL (U"Given the following stereo Sound with a tone of 300 Hz in channel 1 and a tone of 600 Hz in channel two: ")
  89. CODE (U"stereo = Create Sound from formula: \"s\", 2, 0, 1, 44100, \"sin(2*pi*row*300*x)\"")
  90. TAG (U"Example 1")
  91. CODE (U"mm1 = Create simple MixingMatrix: \"mm1\", 2, 1, \"1 0\"")
  92. CODE (U"selectObject: mm1, stereo")
  93. CODE (U"Mix")
  94. DEFINITION (U"will produce a new %mono Sound object that shows a tone with a frequency of 300 Hz.")
  95. DEFINITION (U"The example creates a Mixing matrix with one row and two columns. The resulting new Sound object will have only one channel which is the result of adding the two channels from the stereo sound with weights of 1.0 and 0.0, respectively.")
  96. TAG (U"Example 2")
  97. CODE (U"mm2 = Create simple MixingMatrix: \"mm2\", 2, 1, \"0 1\"")
  98. CODE (U"selectObject: mm2, stereo")
  99. CODE (U"Mix")
  100. DEFINITION (U"will produce a new mono Sound object that shows a tone with a frequency of 600 Hz.")
  101. TAG (U"Example 3")
  102. CODE (U"mm3 = Create simple MixingMatrix: \"mm3\", 2, 1, \"1 1\"")
  103. CODE (U"selectObject: mm3, stereo")
  104. CODE (U"Mix")
  105. DEFINITION (U"will produce a new mono Sound object that shows a complex tone composed of frequencies 300 and 600 Hz. The amplitude of the output sound will be larger than 1")
  106. TAG (U"Example 4")
  107. CODE (U"mm4 = Create simple MixingMatrix: \"mm4\", 2, 2, \"1 0 1 0\"")
  108. CODE (U"selectObject: mm4, stereo")
  109. CODE (U"Mix")
  110. DEFINITION (U"will produce a new stereo Sound object that shows a tone of frequency 300 Hz in both channels.")
  111. TAG (U"Example 5")
  112. CODE (U"mm5 = Create simple MixingMatrix: \"mm5\", 2, 1, \"0.5 0.5\"")
  113. CODE (U"selectObject: mm5, stereo")
  114. CODE (U"Mix")
  115. DEFINITION (U"will produce a new mono Sound object that shows a complex tone composed of frequencies 300 and 600 Hz. The amplitudes of the output sound are now half the amplitude of the output sound of example 3.")
  116. TAG (U"Example 6")
  117. CODE (U"mono = Create Sound from formula: \"s\", 1, 0, 1, 44100, \"sin(2*pi*300*x)\"")
  118. CODE (U"mm6 = Create simple MixingMatrix: \"mm6\", 1, 2, \"1 1\"")
  119. CODE (U"selectObject: mm6, mono")
  120. CODE (U"Mix")
  121. DEFINITION (U"will produce from the mono input sound a stereo output sound that shows a tone of frequency 300 Hz in both channels.")
  122. TAG (U"Example 7")
  123. CODE (U"mm7 = Create simple MixingMatrix: \"mm7\", 2, 2, \"0 1 1 0\"")
  124. CODE (U"selectObject: mm7, stereo")
  125. CODE (U"Mix")
  126. DEFINITION (U"will interchange the channels.")
  127. TAG (U"Example 8")
  128. MAN_END
  129. MAN_BEGIN (U"MixingMatrix: Multiply input channel...", U"djmw", 20170908)
  130. INTRO (U"Multiply an input channel of the selected @@MixingMatrix@ by a value.")
  131. ENTRY (U"Examples")
  132. TAG (U"Increase the contribution of input channel 1 in each output channel by a factor of 2")
  133. CODE (U"Multiply input channel: 1, 2")
  134. TAG (U"Remove the contribution of channel 1 in each output channel")
  135. CODE (U"Multiply input channel: 1, 0")
  136. MAN_END
  137. MAN_BEGIN (U"Sound: To CrossCorrelationTable...", U"djmw", 20110212)
  138. INTRO (U"A command that creates a @@CrossCorrelationTable@ form every selected @@Sound@ object.")
  139. ENTRY (U"Settings")
  140. SCRIPT (5.4, Manual_SETTINGS_WINDOW_HEIGHT (2), U""
  141. Manual_DRAW_SETTINGS_WINDOW ("Sound: To CrossCorrelationTable", 2)
  142. Manual_DRAW_SETTINGS_WINDOW_RANGE("Time range", "0.0", "10.0")
  143. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Lag time", "0.0")
  144. )
  145. TAG (U"##Time range (s)#,")
  146. DEFINITION (U"determines the time range over which the table is calculated.")
  147. TAG (U"##Lag time (s)#,")
  148. DEFINITION (U"determines the lag time.")
  149. ENTRY (U"Algorithm")
  150. NORMAL (U"The cross-correlation between channel %i and channel %j for lag time \\ta is defined as the "
  151. "discretized #integral")
  152. FORMULA (U"cross-corr (%c__%i_, %c__%j_) [%\\ta] \\=3 \\su__%t_ %c__%i_[%t] %c__%j_[%t+%\\ta] %%\\Det%,")
  153. NORMAL (U"where %t and %t+%\\ta are discrete times and %%\\Det% is the @@sampling period@. ")
  154. MAN_END
  155. MAN_BEGIN (U"Sound: To Covariance (channels)...", U"djmw", 20120303)
  156. INTRO (U"Detemines the @@Covariance|covariances@ between the channels of a selected @Sound.")
  157. NORMAL (U"The covariance of a sound is determined by calculating the @@CrossCorrelationTable@ of a multichannel sound for a lag time equal to zero.")
  158. MAN_END
  159. MAN_BEGIN (U"Sound: To Sound (blind source separation)...", U"djmw", 20151030)
  160. INTRO (U"Analyze the selected multi-channel sound into its independent components by an iterative method.")
  161. NORMAL (U"The @@blind source separation@ method to find the independent components tries to simultaneously diagonalize a number of "
  162. "@@CrossCorrelationTable@s that are calculated from the multi-channel sound at different lag times.")
  163. ENTRY (U"Settings")
  164. SCRIPT (5.4, Manual_SETTINGS_WINDOW_HEIGHT (6), U""
  165. Manual_DRAW_SETTINGS_WINDOW ("Sound: To Sound (blind source separation)", 6)
  166. Manual_DRAW_SETTINGS_WINDOW_RANGE("Time range (s)", "0.0", "10.0")
  167. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Number of cross-correlations", "20")
  168. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Lag times", "0.002")
  169. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Maximum number of iterations", "100")
  170. Manual_DRAW_SETTINGS_WINDOW_FIELD ("Tolerance", "0.001")
  171. Manual_DRAW_SETTINGS_WINDOW_OPTIONMENU("Diagonalization method", "ffdiag")
  172. )
  173. TAG (U"##Time range (s)")
  174. DEFINITION (U"defines the time range over which the ##CrossCorrelationTable#s of the sound will be calculated.")
  175. TAG (U"##Number of cross-correlations")
  176. DEFINITION (U"defines the number of ##CrossCorrelationTable#s to be calculated.")
  177. TAG (U"##Lag times")
  178. DEFINITION (U"defines the lag time %\\ta__0_ for the ##CrossCorrelationTable#s. These tables "
  179. "are calculated at lag times %\\ta__k_=(%k - 1)%\\ta__0_, where %k runs from 1 to %%numberOfCrosscorrelations%.")
  180. TAG (U"##Maximum number of iterations")
  181. DEFINITION (U"defines a stopping criterion for the iteration. The iteration will stops when this number is reached.")
  182. TAG (U"##Tolerance")
  183. DEFINITION (U"defines another stopping criterion that depends on the method used.")
  184. TAG (U"##Diagonalization method")
  185. DEFINITION (U"defines the method to determine the independent components.")
  186. ENTRY (U"Algorithm")
  187. NORMAL (U"This method tries to decompose the sound according to the %%instantaneous% mixing model")
  188. FORMULA (U"#Y=#A\\.c#X.")
  189. NORMAL (U"In this model #Y is a matrix with the selected multi-channel sound, #A is a so-called "
  190. "%%mixing matrix% and #X is a matrix with the independent components. "
  191. "Essentially the model says that each channel in the multi-channel sound is a linear combination of the "
  192. "independent sound components in #X. "
  193. "If we would know the mixing matrix #A we could easily solve the model above for #X by standard means. "
  194. "However, if we don't know #A and we don't know #X, the decomposition of #Y is underdetermined. This means there "
  195. "are an infinite number of possible combinations of #A and #X that result in the same #Y. ")
  196. NORMAL (U"One approach to solve the equation above is to make assumptions about the statistical properties "
  197. "of the components in the matrix #X: it turns out that a sufficient assumption is to assume that the "
  198. "components in #X at each time instant are %%statistically independent%. This is not an unrealistic "
  199. "assumption in many cases, although in practice it need not be exactly the case. Another assumption is "
  200. "that the mixing matrix is constant, which means that the mixing conditions did not change during the recoding of the sound." )
  201. NORMAL (U"The theory says that statistically independent signals are not correlated (although the reverse "
  202. "is not always true: signals that are not correlated don't have to be statistically independent). "
  203. "The methods implemented here all follow this lead as follows. If we calculate the @@CrossCorrelationTable@ "
  204. "for the left and the right side signals of the equation above, then, "
  205. "for the multi-channel sound #Y this will result in a cross-correlation matrix #C. For the right side we "
  206. "obtain #A\\.c#D\\.c#A\\'p, where #D is a diagonal matrix because all the cross-correlations between "
  207. "different independent components are zero by definition. This results in the following identity: ")
  208. FORMULA (U"#C(\\ta)=#A\\.c#D(\\ta)\\.c#A\\'p, for all values of the lag time \\ta.")
  209. NORMAL (U"This equation says that, given the model, the cross-correlation matrix can be diagonalized for "
  210. "all values of the lag time %%by the same transformation matrix% #A.")
  211. NORMAL (U"If we calculate the cross-correlation matrices for a number of different lag times, say 20, we "
  212. "then have to obtain the matrix #A that diagonalizes them all. Unfortunately there is no closed form solution "
  213. "that diagonalizes more than two matrices at the same time and we have to resort to iterative "
  214. "algorithms for joint diagonalization. ")
  215. NORMAL (U"Two of these algorithms are the ##qdiag# method as described in @@Vollgraf & Obermayer (2006)@ "
  216. "and the ##ffdiag# method as described in @@Ziehe et al. (2004)@. ")
  217. NORMAL (U"Unfortunately the convergence criteria of these two algorithms cannot easily be compared as "
  218. "the criterion for the ##ffdiag# algorithm is the relative change of the square root of the sum of the "
  219. "squared off-diagonal "
  220. "elements of the transformed cross-correlation matrices and the criterion for ##qdiag# is the largest "
  221. "change in the eigenvectors norm during an iteration.")
  222. ENTRY (U"Example")
  223. NORMAL (U"We start by creating a speech synthesizer that need to create two sounds. We will mix the two sounds and finally our blind source separation software will try to undo our mixing by extracting the two original sounds as well as possible from the two mixtures.")
  224. CODE(U"synth = Create SpeechSynthesizer: \"English (Great Britain)\", \"Female1\"")
  225. CODE(U"s1 = To Sound: \"This is some text\", \"no\"")
  226. NORMAL (U"The first speech sound was created from the text \"This is some text\" at a speed of 175 words per minute.")
  227. CODE(U"selectObject: synth")
  228. CODE(U"Set speech output settings: 44100, 0.01, 80, 50, 145, \"no\", \"IPA\"")
  229. CODE(U"s2 = To Sound.: \"Abracadabra, abra\", \"no\"")
  230. NORMAL (U"The second sound \"Abracadabra, abra\" was synthesized at 145 words per minute with a somewhat larger pitch excursion (80) than the previous sound (50).")
  231. CODE(U"plusObject: s1")
  232. CODE(U"stereo = Combine to stereo")
  233. NORMAL (U"We combine the two separate sounds into one stereo sound because our blind source separation works on multichannel sounds only.")
  234. CODE(U"mm = Create simple MixingMatrix: \"mm\", 2, 2, \"1.0 2.0 2.0 1.0\"")
  235. NORMAL (U"A two by two MixingMatrix is created.")
  236. CODE(U"plusObject: stereo")
  237. CODE(U"Mix")
  238. NORMAL (U"The last command, Mix, creates a new two-channel sound where each channel is a linear mixture of the two "
  239. "channels in the stereo sound, i.e. channel 1 is the sum of s1 and s2 with mixture strengths of 1 and 2, respectively. "
  240. "The second channel is also the sum of s1 and s2 but now with mixture strengths 2 and 1, respectively.")
  241. CODE (U"To Sound (blind source separation): 0.1, 1, 20, 0.0002, 100, 0.001, \"ffdiag\"")
  242. NORMAL (U"The two channels in the new sound that results from this command contain a reasonable approximation of "
  243. "the two originating sounds.")
  244. NORMAL (U"In the top panel the two speech sounds \"This is some text\" and \"abracadabra, abra\". "
  245. "The middle panel shows the two mixed sounds while the lower panel shows the two sounds after unmixing.")
  246. SCRIPT (6, 6, U" "
  247. "syn = Create SpeechSynthesizer: \"English (Great Britain)\", \"Female1\"\n"
  248. "s1 = To Sound: \"This is some text\", \"no\"\n"
  249. "selectObject: syn\n"
  250. "Set speech output settings: 44100, 0.01, 80, 50, 145, \"no\", \"IPA\"\n"
  251. "s2 = To Sound: \"abracadabra, abra\", \"no\"\n"
  252. "plusObject: s1\n"
  253. "stereo = Combine to stereo\n"
  254. "Select inner viewport: 1, 6, 0.1, 1.9\n"
  255. "Draw: 0, 0, 0, 0, \"no\", \"Curve\"\n"
  256. "Draw inner box\n"
  257. "mm = Create simple MixingMatrix: \"mm\", 2, 2, \"1.0 2.0 2.0 1.0\"\n"
  258. "plusObject: stereo\n"
  259. "mixed = Mix\n"
  260. "Select inner viewport: 1, 6, 2.1, 3.9\n"
  261. "Draw: 0, 0, 0, 0, \"no\", \"Curve\"\n"
  262. "Draw inner box\n"
  263. "unmixed = To Sound (bss): 0.1, 1, 20, 0.00021, 100, 0.001, \"ffdiag\"\n"
  264. "Select inner viewport: 1, 6, 4.1, 5.9\n"
  265. "Draw: 0, 0, 0, 0, \"no\", \"Curve\"\n"
  266. "Draw inner box\n"
  267. "removeObject: unmixed, syn, stereo, s1, s2, mixed, mm\n"
  268. )
  269. NORMAL (U"The first two panels will not change between different sessions of praat. The last panel, which shows "
  270. "the result of the blind source separation, i.e. unmixing, will not always be the same because of two things. In the first place the unmixing always starts with an initialisation with random values of the parameters that "
  271. "we have to determine for the blind source separation. Therefore the iteration sequence will never be the same and the final outcomes might differ. In the second place, as was explained in the @@blind source separation@ manual, the unmixing is only "
  272. "unique up to a scale factor and a permutation. Therefore the channels in the unmixed sound do not necessarily correspond to the corresponding channel in our \"original\" stereo sound.")
  273. NORMAL (U"The complete script:")
  274. CODE (U"syn = Create SpeechSynthesizer: \"English (Great Britain)\", \"Female1\"")
  275. CODE (U"s1 = To Sound: \"This is some text\", \"no\"")
  276. CODE (U"selectObject: syn")
  277. CODE (U"Set speech output settings: 44100, 0.01, 80, 50, 145, \"no\", \"IPA\"")
  278. CODE (U"s2 = To Sound: \"abracadabra, abra\", \"no\"")
  279. CODE (U"plusObject: s1")
  280. CODE (U"stereo = Combine to stereo")
  281. CODE (U"Select inner viewport: 1, 6, 0.1, 1.9")
  282. CODE (U"Draw: 0, 0, 0, 0, \"no\", \"Curve\"")
  283. CODE (U"Draw inner box")
  284. CODE (U"mm = Create simple MixingMatrix: \"mm\", 2, 2, \"1.0 2.0 2.0 1.0\"")
  285. CODE (U"plusObject: stereo")
  286. CODE (U"mixed = Mix")
  287. CODE (U"Select inner viewport: 1, 6, 2.1, 3.9")
  288. CODE (U"Draw: 0, 0, 0, 0, \"no\", \"Curve\"")
  289. CODE (U"Draw inner box")
  290. CODE (U"unmixed = To Sound (bss): 0.1, 1, 20, 0.00021, 100, 0.001, \"ffdiag\"")
  291. CODE (U"Select inner viewport: 1, 6, 4.1, 5.9")
  292. CODE (U"Draw: 0, 0, 0, 0, \"no\", \"Curve\"")
  293. CODE (U"Draw inner box")
  294. CODE (U"removeObject: unmixed, syn, stereo, s1, s2, mixed, mm")
  295. MAN_END
  296. MAN_BEGIN (U"Sound: To Sound (whiten channels)...", U"djmw", 20120303)
  297. INTRO (U"Transforms the channels of the selected @Sound linearly to make them white, i.e. the new channels will be uncorrelated and their variances equal unity.")
  298. ENTRY (U"Settings")
  299. TAG (U"##Variance fraction to keep#,")
  300. DEFINITION (U"determines, indirectly, how many channels the final sound will have.")
  301. ENTRY (U"Algorithm")
  302. NORMAL (U"We start by determining the @@Sound: To Covariance (channels)...|covariance@ of the selected sound. "
  303. "Next a @@Principal component analysis|principal component analysis@ determines the eigenvalues and eigenvectors of the covariance matrix. The settings of the variance fraction to keep determines how many eigenvalues and eigenvectors we use for the whitening. This number, %p, will also be equal to the number of channels of the resulting whitened sound.")
  304. NORMAL (U"In mathematical terms. For an %n-channel sound, if #E is the matrix with the eigenvectors and #D=diag (%d__1_, %d__2_,..., %d__n_) is the diagonal matrix with the "
  305. "eigenvalues of the covariance matrix, then the whitening matrix is #W = #E#D^^-1/2^#E\\'p, where #D^^-1/2^=diag (%d__1_^^-1/2^, ..., %d__p_^^-1/2^, 0, ..., 0). Only the %p most important eigenvalues have been retained, where %p was determined as the smallest integer for which (%d__1_+%d__2_+...%d__%p_)/(%d__1_+%d__2_+ ... + %d__%n_) >= %%varianceFractionToKeep%.")
  306. NORMAL (U"The resulting sound samples of the whitened sound, %w__%ij_, are then calculated from the samples of the "
  307. "original sound, %s__%kj_, as %w__%ij_ = \\Si__%k_ W__%ik_ %s__%kj_, where 1 \\<_ %i \\<_%p, 1 \\<_ %j \\<_ numberOfSamples and 1 \\<_ %k \\<_ %n.")
  308. MAN_END
  309. MAN_BEGIN (U"blind source separation", U"djmw", 20120907)
  310. INTRO (U"Blind source separation (BSS) is a technique for estimating individual source components from their mixtures "
  311. "at multiple sensors. It is called %blind because we don't use any other information besides the mixtures. ")
  312. NORMAL (U"For example, imagine a room with a number of persons present and a number of microphones for recording. "
  313. "When one or more persons are speaking at the same time, each microphone registers a different %mixture of individual speaker's audio signals. It is the task of BSS to untangle these mixtures into their sources, i.e. the individual speaker's audio signals. "
  314. "In general, this is a difficult problem because of several complicating factors. ")
  315. LIST_ITEM (U"\\bu Different locations of speakers and microphones in the room: the individual speaker's audio signals do not reach all microphones at the same time. ")
  316. LIST_ITEM (U"\\bu Room acoustics: the signal that reaches a microphone is composed of the signal that %directly travels to the microphone and parts that come from room reverberations and echos. ")
  317. LIST_ITEM (U"\\bu Varying distances to microphones: one or more speakers might be moving. This makes the mixing time dependent.")
  318. NORMAL (U"If the number of sensors is %larger than the number of sources we speak of an %overdetermined problem. If the number of sensors and the number of sources are %equal we speak of a %determined problem. The more difficult problem is the %underdetermined one where the number of sensors is %less than the number of sources.")
  319. ENTRY (U"Typology of mixtures")
  320. NORMAL (U"In general two different types of mixtures are considered in the literature: %%instantaneous "
  321. "mixtures% and %%convolutive mixtures%. ")
  322. TAG (U"%%Instantaneous mixtures%")
  323. DEFINITION (U"where the mixing is instantaneous, corresponds to the model #Y=#A\\.c#X. In this model #Y is a matrix with the recorded microphone sounds, #A is a so-called "
  324. "%%mixing matrix% and #X is a matrix with the independent source signals. "
  325. "Essentially the model says that the signal that each microphone records is a (possibly different) linear combination of the %same source signals. "
  326. "If we would know the mixing matrix #A we could easily solve the model above for #X by standard means. "
  327. "However, in general we don't know #A and #X and there are an infinite number of possible decompositions for #Y. The problem is however solvable by making some (mild) assumptions about #A and #X. ")
  328. TAG (U"%%Convolutive mixtures%")
  329. DEFINITION (U"are mixtures where the mixing is of convolutive nature, i.e. the model is ")
  330. FORMULA (U"%%y__i_ (n)% = \\Si__%j_^^%d^\\Si__%\\ta_^^M__%ij_-1^ %%h__ij_(\\ta)x__j_(n-\\ta) + N__i_(n)%, for %i=1..m.")
  331. DEFINITION (U"Here %%y__i_ (n) is the %n-th sample of the %i-th microphone signal, %m is the number of microphones, %%h__ij_(\\ta)% is the multi-input multi-output linear filter with the source-microphone impulse responses that characterize the propagation of the sound in the room and %%N__i_% is a noise source. This model is typically much harder to solve than the previous one because of the %%h__ij_(\\ta)% filter term that can have thousands of coefficients. For example, the typical @@reverberation time@ of a room is approximately 0.3 s which corresponds to 2400 samples, i.e. filter coefficients, for an 8 kHz sampled sound.")
  332. ENTRY (U"Solving the blind source separation for instantaneous mixtures")
  333. NORMAL (U"Various techniques exist for solving the blind source separation problem for %instantaneous mixtures. Very popular ones make make use of second order statistics (SOS) by trying to "
  334. "simultaneously diagonalize a large number of cross-correlation matrices. Other techniques like independent component analysis use higher order statistics (HOS) to find the independent components, i.e. the sources.")
  335. NORMAL (U"Given the decomposition problem #Y=#A\\.c#X, we can see that the solution is determined "
  336. "only upto a permutation and a scaling of the components. This is called the %%indeterminancy "
  337. "problem% of BSS. This can be seen as follows: given a permutation matrix #P, i.e. a matrix which "
  338. "contains only zeros except for one 1 in every row and column, and a diagonal scaling matrix #D, any "
  339. "scaling and permutation of the independent components #X__%n_=(#D\\.c#P)\\.c#X can be compensated "
  340. "by the reversed scaling of the mixing matrix #A__%n_=#A\\.c(#D\\.c#P)^^-1^ because #A\\.c(#D\\.c#P)^^-1^\\.c(#D\\.c#P)\\.c#X = #A\\.c#X = #Y. ")
  341. ENTRY (U"Solving the blind source separation for convolutive mixtures")
  342. NORMAL (U"Solutions for %convolutive mixture problems are much harder to achieve. "
  343. "One normally starts by transforming the problem to the frequency domain where the "
  344. "convolution is turned into a multiplication. The problem then translates into a separate "
  345. "%%instantaneous% mixing problem for %%each% frequency in the frequency domain. It is here that "
  346. "the indeterminacy problem hits us because it is not clear beforehand how to combine the "
  347. "independent components of each frequency bin.")
  348. MAN_END
  349. MAN_BEGIN (U"reverberation time", U"djmw", 20110107)
  350. NORMAL (U"Reverberation is the persistence of sound in a room after the sound source has silenced. ")
  351. NORMAL (U"The %%reverberation time% is normally defined as the time required for the persistence of a direct sound "
  352. "to decay by 60 dB after the direct sound has silenced. Sometimes this dB level is indicated with a subscript "
  353. "and the reverberation time is given by the symbol %T__60_. "
  354. "The reverberation time depends mainly on a room's volume and area and on the absorption at the walls. Generally absorption is frequency dependent and therefore the reverberation time of a room varies with frequency. ")
  355. MAN_END
  356. MAN_BEGIN (U"Vollgraf & Obermayer (2006)", U"djmw", 20110105)
  357. NORMAL (U"Roland Vollgraf & Klaus Obermayer (2006): \"Quadratic optimization for simultaneous matrix "
  358. "diagonalization.\" %%IEEE Transactions On Signal Processing% #54: 3270\\--3278.")
  359. MAN_END
  360. MAN_BEGIN (U"Ziehe et al. (2004)", U"djmw", 20110105)
  361. NORMAL (U"Andreas Ziehe, Pavel Laskov, Guido Nolte & Klaus-Robert M\\u\"ller (2004): \"A fast algorithm for joint "
  362. "diagonalization with non-orthogonal transformations and its application to blind source separation\", "
  363. "%%Journal of Machine Learning Research% #5: 777\\--800.")
  364. MAN_END
  365. }
  366. /* End of file manual_BSS.cpp */