NUM.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. /* NUM.cpp
  2. *
  3. * Copyright (C) 2017,2018 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. * See the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "melder.h"
  19. double NUMcenterOfGravity (const constVEC& x) noexcept {
  20. longdouble weightedSumOfIndexes = 0.0, sumOfWeights = 0.0;
  21. for (integer i = 1; i <= x.size; i ++) {
  22. weightedSumOfIndexes += i * x [i];
  23. sumOfWeights += x [i];
  24. }
  25. return double (weightedSumOfIndexes / sumOfWeights);
  26. }
  27. double NUMcolumnMean (const constMAT& x, integer columnNumber) noexcept {
  28. Melder_assert (columnNumber > 0 && columnNumber <= x.ncol);
  29. integer stride = x.ncol;
  30. PAIRWISE_SUM (longdouble, sum, integer, x.nrow,
  31. const double *px = & x [1] [columnNumber],
  32. (longdouble) *px,
  33. px += stride
  34. )
  35. return double (sum / x.nrow);
  36. }
  37. double NUMcolumnSum (const constMAT& x, integer columnNumber) noexcept {
  38. Melder_assert (columnNumber > 0 && columnNumber <= x.nrow);
  39. integer stride = x.ncol;
  40. PAIRWISE_SUM (longdouble, sum, integer, x.nrow,
  41. const double *px = & x [1] [columnNumber],
  42. (longdouble) *px,
  43. px += stride
  44. )
  45. return double (sum);
  46. }
  47. double NUMinner_ (const constVEC& x, const constVEC& y) noexcept {
  48. PAIRWISE_SUM (longdouble, sum, integer, x.size,
  49. const double *px = & x [1];
  50. const double *py = & y [1],
  51. (longdouble) *px * (longdouble) *py,
  52. (++ px, ++ py)
  53. )
  54. return double (sum);
  55. }
  56. double NUMnorm (const constVEC& x, double power) noexcept {
  57. if (power < 0.0) return undefined;
  58. if (power == 2.0) {
  59. PAIRWISE_SUM (longdouble, sum, integer, x.size,
  60. const double *px = & x [1],
  61. (longdouble) *px * (longdouble) *px,
  62. ++ px
  63. )
  64. return sqrt ((double) sum);
  65. } else if (power == 1.0) {
  66. PAIRWISE_SUM (longdouble, sum, integer, x.size,
  67. const double *px = & x [1],
  68. (longdouble) fabs (*px),
  69. ++ px
  70. )
  71. return (double) sum;
  72. } else {
  73. PAIRWISE_SUM (longdouble, sum, integer, x.size,
  74. const double *px = & x [1],
  75. powl ((longdouble) fabs (*px), power),
  76. ++ px
  77. )
  78. return (double) powl (sum, (longdouble) 1.0 / power);
  79. }
  80. }
  81. integer NUMnumberOfTokens (conststring32 string) noexcept {
  82. integer numberOfTokens = 0;
  83. const char32 *p = & string [0];
  84. for (;;) {
  85. Melder_skipHorizontalOrVerticalSpace (& p);
  86. if (*p == U'\0')
  87. break;
  88. numberOfTokens ++;
  89. p ++; // step over first nonspace
  90. p = Melder_findEndOfInk (p);
  91. }
  92. return numberOfTokens;
  93. }
  94. double NUMstdev (const constVEC& x) noexcept {
  95. double stdev;
  96. NUM_sum_mean_sumsq_variance_stdev (x, nullptr, nullptr, nullptr, nullptr, & stdev);
  97. return stdev;
  98. }
  99. void NUM_sum_mean (const constVEC& x, double *out_sum, double *out_mean) noexcept {
  100. if (x.size <= 4) {
  101. switch (x.size) {
  102. case 0: {
  103. if (out_sum) *out_sum = 0.0;
  104. if (out_mean) *out_mean = undefined;
  105. } break; case 1: {
  106. if (out_sum) *out_sum = x [1];
  107. if (out_mean) *out_mean = x [1];
  108. } break; case 2: {
  109. longdouble sum = (longdouble) x [1] + (longdouble) x [2];
  110. if (out_sum) *out_sum = (double) sum;
  111. if (out_mean) *out_mean = double (0.5 * sum);
  112. } break; case 3: {
  113. longdouble sum = (longdouble) x [1] + (longdouble) x [2] + (longdouble) x [3];
  114. if (out_sum) *out_sum = (double) sum;
  115. if (out_mean) *out_mean = double ((1.0 / (longdouble) 3.0) * sum);
  116. } break; case 4: {
  117. longdouble sum = ((longdouble) x [1] + (longdouble) x [2]) + ((longdouble) x [3] + (longdouble) x [4]);
  118. if (out_sum) *out_sum = (double) sum;
  119. if (out_mean) *out_mean = double (0.25 * sum);
  120. } break; default: {
  121. if (out_sum) *out_sum = undefined;
  122. if (out_mean) *out_mean = undefined;
  123. }
  124. }
  125. return;
  126. }
  127. if (Melder_debug == 0 || Melder_debug < 48 || Melder_debug > 51) {
  128. PAIRWISE_SUM (longdouble, sum, integer, x.size, const double *px = & x [1], (longdouble) *px, ++ px)
  129. if (out_sum) *out_sum = (double) sum;
  130. if (out_mean) *out_mean = double (sum / x.size); // it helps a bit to perform the division while still in longdouble
  131. } else if (Melder_debug == 48) {
  132. SEQUENTIAL_SUM (double, sum, integer, x.size, const double *px = & x [1], *px, ++ px)
  133. if (out_sum) *out_sum = (double) sum;
  134. if (out_mean) *out_mean = double (sum / x.size);
  135. } else if (Melder_debug == 49) {
  136. SEQUENTIAL_SUM (longdouble, sum, integer, x.size, const double *px = & x [1], *px, ++ px)
  137. if (out_sum) *out_sum = (double) sum;
  138. if (out_mean) *out_mean = double (sum / x.size);
  139. } else if (Melder_debug == 50) {
  140. KAHAN_SUM (longdouble, sum, integer, x.size, const double *px = & x [1], *px, ++ px)
  141. if (out_sum) *out_sum = (double) sum;
  142. if (out_mean) *out_mean = double (sum / x.size);
  143. } else if (Melder_debug == 51) {
  144. TWO_LOOP_SUM (longdouble, sum, integer, x.size, const double *px = & x [1], *px, ++ px)
  145. if (out_sum) *out_sum = (double) sum;
  146. if (out_mean) *out_mean = double (sum / x.size);
  147. }
  148. }
  149. void NUM_sum_mean_sumsq_variance_stdev (const constVEC& x,
  150. double *out_sum, double *out_mean, double *out_sumsq, double *out_variance, double *out_stdev) noexcept
  151. {
  152. if (x.size < 2) {
  153. if (x.size <= 0) {
  154. if (out_sum) *out_sum = 0.0;
  155. if (out_mean) *out_mean = undefined;
  156. if (out_sumsq) *out_sumsq = undefined;
  157. } else {
  158. if (out_sum) *out_sum = x [1];
  159. if (out_mean) *out_mean = x [1];
  160. if (out_sumsq) *out_sumsq = 0.0;
  161. }
  162. if (out_variance) *out_variance = undefined;
  163. if (out_stdev) *out_stdev = undefined;
  164. return;
  165. }
  166. if (Melder_debug != 0) {
  167. if (Melder_debug == 48) {
  168. /*
  169. Naive implementation in double.
  170. */
  171. double sum = 0.0; // -> sum in R (invariant) [R is the set of real numbers]
  172. for (integer i = 1; i <= x.size; i ++)
  173. sum += x [i]; // sum before in R, x [i] in R -> sum after in R
  174. if (out_sum) *out_sum = sum;
  175. double mean = sum / x.size; // sum in R, x.size != 0 -> mean in R
  176. if (out_mean) *out_mean = mean;
  177. if (! out_sumsq && ! out_variance && ! out_stdev) return;
  178. double sumOfSquaredResiduals = 0.0; // -> sumOfSquares >= 0.0 (invariant)
  179. for (integer i = 1; i <= x.size; i ++) {
  180. double residual = x [i] - mean; // x [i] in R, mean in R -> residual in R
  181. double squaredResidual = residual * residual; // residual in R -> squaredResidual >= 0.0
  182. sumOfSquaredResiduals += squaredResidual; // sumOfSquaredResiduals before >= 0.0, squaredResidual >= 0.0 -> sumOfSquaredResiduals after >= 0.0
  183. }
  184. if (out_sumsq) *out_sumsq = sumOfSquaredResiduals;
  185. integer degreesOfFreedom = x.size - 1; // x.size >= 2 -> degreesOfFreedom >= 1 -> degreesOfFreedom > 0
  186. double meanSquaredResidual = sumOfSquaredResiduals / degreesOfFreedom; // sumOfSquaredResiduals >= 0.0, degreesOfFreedom > 0 -> meanSquaredResidual >= 0.0
  187. if (out_variance) *out_variance = (double) meanSquaredResidual;
  188. if (out_stdev) {
  189. double rootMeanSquaredResidual = sqrt (meanSquaredResidual); // meanSquaredResidual >= 0.0 -> rootMeanSquaredResidual >= 0.0 (in particular, not NaN)
  190. *out_stdev = rootMeanSquaredResidual;
  191. }
  192. return;
  193. }
  194. if (Melder_debug == 49) {
  195. /*
  196. Naive implementation in longdouble.
  197. */
  198. longdouble sum = 0.0; // -> sum in R (invariant)
  199. for (integer i = 1; i <= x.size; i ++)
  200. sum += (longdouble) x [i]; // sum before in R, x [i] in R -> sum after in R
  201. if (out_sum) *out_sum = (double) sum;
  202. longdouble mean = sum / x.size; // sum in R, x.size != 0 -> mean in R
  203. if (out_mean) *out_mean = (double) mean;
  204. if (! out_sumsq && ! out_variance && ! out_stdev) return;
  205. longdouble sumOfSquaredResiduals = 0.0; // -> sumOfSquares >= 0.0 (invariant)
  206. for (integer i = 1; i <= x.size; i ++) {
  207. longdouble residual = (longdouble) x [i] - mean; // x [i] in R, mean in R -> residual in R
  208. longdouble squaredResidual = residual * residual; // residual in R -> squaredResidual >= 0.0
  209. sumOfSquaredResiduals += squaredResidual; // sumOfSquaredResiduals before >= 0.0, squaredResidual >= 0.0 -> sumOfSquaredResiduals after >= 0.0
  210. }
  211. if (out_sumsq) *out_sumsq = (double) sumOfSquaredResiduals;
  212. integer degreesOfFreedom = x.size - 1; // x.size >= 2 -> degreesOfFreedom >= 1 -> degreesOfFreedom > 0
  213. longdouble meanSquaredResidual = sumOfSquaredResiduals / degreesOfFreedom; // sumOfSquaredResiduals >= 0.0, degreesOfFreedom > 0 -> meanSquaredResidual >= 0.0
  214. if (out_variance) *out_variance = (double) meanSquaredResidual;
  215. if (out_stdev) {
  216. longdouble rootMeanSquaredResidual = sqrtl (meanSquaredResidual); // meanSquaredResidual >= 0.0 -> rootMeanSquaredResidual >= 0.0 (in particular, not NaN)
  217. *out_stdev = (double) rootMeanSquaredResidual;
  218. }
  219. return;
  220. }
  221. if (Melder_debug == 50) {
  222. double mean;
  223. NUM_sum_mean (x, out_sum, & mean);
  224. if (out_mean) *out_mean = mean;
  225. if (! out_sumsq && ! out_variance && ! out_stdev) {
  226. return;
  227. }
  228. KAHAN_SUM (longdouble, sumsq, integer, x.size,
  229. const double *px = & x [1],
  230. longdouble (*px - mean) * longdouble (*px - mean),
  231. ++ px)
  232. double variance = double (sumsq / (x.size - 1));
  233. if (out_sumsq) *out_sumsq = (double) sumsq;
  234. if (out_variance) *out_variance = variance;
  235. if (out_stdev) *out_stdev = sqrt (variance);
  236. return;
  237. }
  238. if (Melder_debug == 51) {
  239. double sum, mean;
  240. NUM_sum_mean (x, & sum, & mean);
  241. if (out_sum) *out_sum = sum;
  242. if (out_mean) *out_mean = mean;
  243. if (! out_sumsq && ! out_variance && ! out_stdev) return;
  244. double sumOfSquaredResiduals = 0.0; // -> sumOfSquares >= 0.0 (invariant)
  245. for (integer i = 1; i <= x.size; i ++) {
  246. double residual = x [i] - mean; // x [i] in R, mean in R -> residual in R
  247. double squaredResidual = residual * residual; // residual in R -> squaredResidual >= 0.0
  248. sumOfSquaredResiduals += squaredResidual; // sumOfSquaredResiduals before >= 0.0, squaredResidual >= 0.0 -> sumOfSquaredResiduals after >= 0.0
  249. }
  250. if (out_sumsq) *out_sumsq = sumOfSquaredResiduals;
  251. integer degreesOfFreedom = x.size - 1; // x.size >= 2 -> degreesOfFreedom >= 1 -> degreesOfFreedom > 0
  252. double meanSquaredResidual = sumOfSquaredResiduals / degreesOfFreedom; // sumOfSquaredResiduals >= 0.0, degreesOfFreedom > 0 -> meanSquaredResidual >= 0.0
  253. if (out_variance) *out_variance = (double) meanSquaredResidual;
  254. if (out_stdev) {
  255. double rootMeanSquaredResidual = sqrt (meanSquaredResidual); // meanSquaredResidual >= 0.0 -> rootMeanSquaredResidual >= 0.0 (in particular, not NaN)
  256. *out_stdev = rootMeanSquaredResidual;
  257. }
  258. return;
  259. }
  260. }
  261. /*
  262. Our standard: pairwise algorithm with base case 64.
  263. */
  264. PAIRWISE_SUM (longdouble, sum, integer, x.size,
  265. const double *px = & x [1],
  266. (longdouble) *px,
  267. px += 1
  268. )
  269. double mean = double (sum / x.size); // rounded to double, because this guarantees that x[i] - mean will be zero for constant x[1..size]
  270. if (out_sum) *out_sum = (double) sum;
  271. if (out_mean) *out_mean = (double) mean;
  272. if (! out_sumsq && ! out_variance && ! out_stdev) return;
  273. PAIRWISE_SUM (longdouble, sumsq, integer, x.size,
  274. const double *xx = & x [1],
  275. longdouble (*xx - mean) * longdouble (*xx - mean),
  276. xx += 1
  277. )
  278. longdouble variance = sumsq / (x.size - 1);
  279. if (out_sumsq) *out_sumsq = (double) sumsq;
  280. if (out_variance) *out_variance = (double) variance;
  281. if (out_stdev) *out_stdev = sqrt ((double) variance);
  282. }
  283. void NUM_sum_mean_sumsq_variance_stdev (const constMAT& x, integer columnNumber,
  284. double *out_sum, double *out_mean, double *out_sumsq, double *out_variance, double *out_stdev) noexcept
  285. {
  286. if (x.nrow < 2) {
  287. if (x.nrow <= 0) {
  288. if (out_sum) *out_sum = 0.0;
  289. if (out_mean) *out_mean = undefined;
  290. if (out_sumsq) *out_sumsq = undefined;
  291. } else {
  292. if (out_sum) *out_sum = x [1] [columnNumber];
  293. if (out_mean) *out_mean = x [1] [columnNumber];
  294. if (out_sumsq) *out_sumsq = 0.0;
  295. }
  296. if (out_variance) *out_variance = undefined;
  297. if (out_stdev) *out_stdev = undefined;
  298. return;
  299. }
  300. PAIRWISE_SUM (longdouble, sum, integer, x.nrow,
  301. const double *px = & x [1] [columnNumber],
  302. (longdouble) *px,
  303. px += x.ncol
  304. )
  305. double mean = double (sum / x.nrow); // rounded to double, because this guarantees that x[i] - mean will be zero for constant x[1..size]
  306. if (out_sum) *out_sum = (double) sum;
  307. if (out_mean) *out_mean = (double) mean;
  308. if (! out_sumsq && ! out_variance && ! out_stdev) return;
  309. PAIRWISE_SUM (longdouble, sumsq, integer, x.nrow,
  310. const double *px = & x [1] [columnNumber],
  311. longdouble (*px - mean) * longdouble (*px - mean),
  312. px += x.ncol
  313. )
  314. longdouble variance = sumsq / (x.nrow - 1);
  315. if (out_sumsq) *out_sumsq = (double) sumsq;
  316. if (out_variance) *out_variance = (double) variance;
  317. if (out_stdev) *out_stdev = sqrt ((double) variance);
  318. }
  319. double NUMsumsq (const constVEC& x) noexcept {
  320. double sumsq;
  321. NUM_sum_mean_sumsq_variance_stdev (x, nullptr, nullptr, & sumsq, nullptr, nullptr);
  322. return sumsq;
  323. }
  324. double NUMvariance (const constVEC& x) noexcept {
  325. double variance;
  326. NUM_sum_mean_sumsq_variance_stdev (x, nullptr, nullptr, nullptr, & variance, nullptr);
  327. return variance;
  328. }
  329. /* End of file NUM.cpp */