gsl_cblas__source_trsv_c.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. /* blas/source_trsv_c.h
  2. *
  3. * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. {
  20. const int conj = (TransA == CblasConjTrans) ? -1 : 1;
  21. const int Trans = (TransA != CblasConjTrans) ? TransA : CblasTrans;
  22. const int nonunit = (Diag == CblasNonUnit);
  23. INDEX i, j;
  24. INDEX ix, jx;
  25. if (N == 0)
  26. return;
  27. /* form x := inv( A )*x */
  28. if ((order == CblasRowMajor && Trans == CblasNoTrans && Uplo == CblasUpper)
  29. || (order == CblasColMajor && Trans == CblasTrans && Uplo == CblasLower)) {
  30. ix = OFFSET(N, incX) + incX * (N - 1);
  31. if (nonunit) {
  32. const BASE a_real = CONST_REAL(A, lda * (N - 1) + (N - 1));
  33. const BASE a_imag = conj * CONST_IMAG(A, lda * (N - 1) + (N - 1));
  34. const BASE x_real = REAL(X, ix);
  35. const BASE x_imag = IMAG(X, ix);
  36. const BASE s = xhypot(a_real, a_imag);
  37. const BASE b_real = a_real / s;
  38. const BASE b_imag = a_imag / s;
  39. REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
  40. IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
  41. }
  42. ix -= incX;
  43. for (i = N - 1; i > 0 && i--;) {
  44. BASE tmp_real = REAL(X, ix);
  45. BASE tmp_imag = IMAG(X, ix);
  46. jx = ix + incX;
  47. for (j = i + 1; j < N; j++) {
  48. const BASE Aij_real = CONST_REAL(A, lda * i + j);
  49. const BASE Aij_imag = conj * CONST_IMAG(A, lda * i + j);
  50. const BASE x_real = REAL(X, jx);
  51. const BASE x_imag = IMAG(X, jx);
  52. tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
  53. tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
  54. jx += incX;
  55. }
  56. if (nonunit) {
  57. const BASE a_real = CONST_REAL(A, lda * i + i);
  58. const BASE a_imag = conj * CONST_IMAG(A, lda * i + i);
  59. const BASE s = xhypot(a_real, a_imag);
  60. const BASE b_real = a_real / s;
  61. const BASE b_imag = a_imag / s;
  62. REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
  63. IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
  64. } else {
  65. REAL(X, ix) = tmp_real;
  66. IMAG(X, ix) = tmp_imag;
  67. }
  68. ix -= incX;
  69. }
  70. } else if ((order == CblasRowMajor && Trans == CblasNoTrans && Uplo == CblasLower)
  71. || (order == CblasColMajor && Trans == CblasTrans && Uplo == CblasUpper)) {
  72. /* forward substitution */
  73. ix = OFFSET(N, incX);
  74. if (nonunit) {
  75. const BASE a_real = CONST_REAL(A, lda * 0 + 0);
  76. const BASE a_imag = conj * CONST_IMAG(A, lda * 0 + 0);
  77. const BASE x_real = REAL(X, ix);
  78. const BASE x_imag = IMAG(X, ix);
  79. const BASE s = xhypot(a_real, a_imag);
  80. const BASE b_real = a_real / s;
  81. const BASE b_imag = a_imag / s;
  82. REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
  83. IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
  84. }
  85. ix += incX;
  86. for (i = 1; i < N; i++) {
  87. BASE tmp_real = REAL(X, ix);
  88. BASE tmp_imag = IMAG(X, ix);
  89. jx = OFFSET(N, incX);
  90. for (j = 0; j < i; j++) {
  91. const BASE Aij_real = CONST_REAL(A, lda * i + j);
  92. const BASE Aij_imag = conj * CONST_IMAG(A, lda * i + j);
  93. const BASE x_real = REAL(X, jx);
  94. const BASE x_imag = IMAG(X, jx);
  95. tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
  96. tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
  97. jx += incX;
  98. }
  99. if (nonunit) {
  100. const BASE a_real = CONST_REAL(A, lda * i + i);
  101. const BASE a_imag = conj * CONST_IMAG(A, lda * i + i);
  102. const BASE s = xhypot(a_real, a_imag);
  103. const BASE b_real = a_real / s;
  104. const BASE b_imag = a_imag / s;
  105. REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
  106. IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
  107. } else {
  108. REAL(X, ix) = tmp_real;
  109. IMAG(X, ix) = tmp_imag;
  110. }
  111. ix += incX;
  112. }
  113. } else if ((order == CblasRowMajor && Trans == CblasTrans && Uplo == CblasUpper)
  114. || (order == CblasColMajor && Trans == CblasNoTrans && Uplo == CblasLower)) {
  115. /* form x := inv( A' )*x */
  116. /* forward substitution */
  117. ix = OFFSET(N, incX);
  118. if (nonunit) {
  119. const BASE a_real = CONST_REAL(A, lda * 0 + 0);
  120. const BASE a_imag = conj * CONST_IMAG(A, lda * 0 + 0);
  121. const BASE x_real = REAL(X, ix);
  122. const BASE x_imag = IMAG(X, ix);
  123. const BASE s = xhypot(a_real, a_imag);
  124. const BASE b_real = a_real / s;
  125. const BASE b_imag = a_imag / s;
  126. REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
  127. IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
  128. }
  129. ix += incX;
  130. for (i = 1; i < N; i++) {
  131. BASE tmp_real = REAL(X, ix);
  132. BASE tmp_imag = IMAG(X, ix);
  133. jx = OFFSET(N, incX);
  134. for (j = 0; j < i; j++) {
  135. const BASE Aij_real = CONST_REAL(A, lda * j + i);
  136. const BASE Aij_imag = conj * CONST_IMAG(A, lda * j + i);
  137. const BASE x_real = REAL(X, jx);
  138. const BASE x_imag = IMAG(X, jx);
  139. tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
  140. tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
  141. jx += incX;
  142. }
  143. if (nonunit) {
  144. const BASE a_real = CONST_REAL(A, lda * i + i);
  145. const BASE a_imag = conj * CONST_IMAG(A, lda * i + i);
  146. const BASE s = xhypot(a_real, a_imag);
  147. const BASE b_real = a_real / s;
  148. const BASE b_imag = a_imag / s;
  149. REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
  150. IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
  151. } else {
  152. REAL(X, ix) = tmp_real;
  153. IMAG(X, ix) = tmp_imag;
  154. }
  155. ix += incX;
  156. }
  157. } else if ((order == CblasRowMajor && Trans == CblasTrans && Uplo == CblasLower)
  158. || (order == CblasColMajor && Trans == CblasNoTrans && Uplo == CblasUpper)) {
  159. /* backsubstitution */
  160. ix = OFFSET(N, incX) + incX * (N - 1);
  161. if (nonunit) {
  162. const BASE a_real = CONST_REAL(A, lda * (N - 1) + (N - 1));
  163. const BASE a_imag = conj * CONST_IMAG(A, lda * (N - 1) + (N - 1));
  164. const BASE x_real = REAL(X, ix);
  165. const BASE x_imag = IMAG(X, ix);
  166. const BASE s = xhypot(a_real, a_imag);
  167. const BASE b_real = a_real / s;
  168. const BASE b_imag = a_imag / s;
  169. REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
  170. IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
  171. }
  172. ix -= incX;
  173. for (i = N - 1; i > 0 && i--;) {
  174. BASE tmp_real = REAL(X, ix);
  175. BASE tmp_imag = IMAG(X, ix);
  176. jx = ix + incX;
  177. for (j = i + 1; j < N; j++) {
  178. const BASE Aij_real = CONST_REAL(A, lda * j + i);
  179. const BASE Aij_imag = conj * CONST_IMAG(A, lda * j + i);
  180. const BASE x_real = REAL(X, jx);
  181. const BASE x_imag = IMAG(X, jx);
  182. tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
  183. tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
  184. jx += incX;
  185. }
  186. if (nonunit) {
  187. const BASE a_real = CONST_REAL(A, lda * i + i);
  188. const BASE a_imag = conj * CONST_IMAG(A, lda * i + i);
  189. const BASE s = xhypot(a_real, a_imag);
  190. const BASE b_real = a_real / s;
  191. const BASE b_imag = a_imag / s;
  192. REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
  193. IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
  194. } else {
  195. REAL(X, ix) = tmp_real;
  196. IMAG(X, ix) = tmp_imag;
  197. }
  198. ix -= incX;
  199. }
  200. } else {
  201. BLAS_ERROR("unrecognized operation");
  202. }
  203. }