VEC.cpp 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. /* VEC.cpp
  2. *
  3. * Copyright (C) 2017,2018 Paul Boersma
  4. *
  5. * This code is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This code is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. * See the GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this work. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #include "melder.h"
  19. #include "../dwsys/NUM2.h" /* for NUMsort2 */
  20. #ifdef macintosh
  21. #include "macport_on.h"
  22. #include <Accelerate/Accelerate.h>
  23. #include "macport_off.h"
  24. #endif
  25. #if defined (macintosh)
  26. void VECadd_macfast_ (const VEC& target, const constVEC& x, const constVEC& y) noexcept {
  27. integer n = target.size;
  28. vDSP_vaddD (& x [1], 1, & y [1], 1, & target [1], 1, integer_to_uinteger (n));
  29. /*
  30. Speed if always vDSP_vaddD:
  31. 9.3,1.26,0.21, 0.10,0.42,0.70, 1.17,1.97,5.32
  32. 20: 0.71 30: 0.54 40: 0.40 50: 0.30 60: 0.30 70: 0.26 80: 0.23 90: 0.23
  33. 1200: 0.10 1300: 0.12 1400: 0.16 1500: 0.27 1600: 0.33 1700: 0.40
  34. Speed if always explicit loop:
  35. 1.7,0.40,0.23, 0.22,0.37,0.64, 1.11,1.93,5.11
  36. 20: 0.26 30: 0.29 40: 0.24 50: 0.26 60: 0.24 70: 0.25 80: 0.24 90: 0.24
  37. 1200: 0.22 1300: 0.22 1400: 0.22 1500: 0.28 1600: 0.33 1700: 0.35
  38. Combined:
  39. 2.4,0.58,0.20, 0.10,0.42,0.68, 1.11,1.93,5.02
  40. 20: 0.30 30: 0.29 40: 0.24 50: 0.26 60: 0.23 70: 0.26 80: 0.23 90: 0.23
  41. 1200: 0.10 1300: 0.12 1400: 0.16 1500: 0.29 1600: 0.34 1700: 0.37
  42. */
  43. }
  44. #endif
  45. void VECmul_preallocated (const VEC& target, const constVEC& vec, const constMAT& mat) noexcept {
  46. Melder_assert (mat.nrow == vec.size);
  47. Melder_assert (target.size == mat.ncol);
  48. if ((true)) {
  49. for (integer icol = 1; icol <= mat.ncol; icol ++) {
  50. target [icol] = 0.0;
  51. for (integer irow = 1; irow <= mat.nrow; irow ++)
  52. target [icol] += vec [irow] * mat [irow] [icol];
  53. }
  54. } else if ((false)) {
  55. for (integer icol = 1; icol <= mat.ncol; icol ++) {
  56. PAIRWISE_SUM (longdouble, sum, integer, vec.size,
  57. const double *px = & vec [1];
  58. const double *py = & mat [1] [icol],
  59. (longdouble) *px * (longdouble) *py,
  60. (px += 1, py += mat.ncol) // this goes way beyond the confines of mat
  61. )
  62. target [icol] = double (sum);
  63. }
  64. }
  65. }
  66. autoVEC VECmul (const constVEC& vec, const constMAT& mat) noexcept {
  67. autoVEC result = VECraw (mat.ncol);
  68. VECmul_preallocated (result.get(), vec, mat);
  69. return result;
  70. }
  71. void VECmul_preallocated (const VEC& target, const constMAT& mat, const constVEC& vec) noexcept {
  72. Melder_assert (vec.size == mat.ncol);
  73. Melder_assert (target.size == mat.nrow);
  74. for (integer i = 1; i <= mat.nrow; i ++) {
  75. if ((false)) {
  76. target [i] = 0.0;
  77. for (integer j = 1; j <= vec.size; j ++)
  78. target [i] += mat [i] [j] * vec [j];
  79. } else {
  80. target [i] = NUMinner (constVEC (& mat [i] [1] - 1, mat.ncol), vec);
  81. }
  82. }
  83. }
  84. autoVEC VECmul (const constMAT& mat, const constVEC& vec) noexcept {
  85. autoVEC result = VECraw (mat.nrow);
  86. VECmul_preallocated (result.get(), mat, vec);
  87. return result;
  88. }
  89. /* End of file VEC.cpp */