gsl_cblas__source_trsm_c.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /* blas/source_trsm_c.h
  2. *
  3. * Copyright (C) 2001, 2007 Brian Gough
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. {
  20. INDEX i, j, k;
  21. INDEX n1, n2;
  22. const int nonunit = (Diag == CblasNonUnit);
  23. const int conj = (TransA == CblasConjTrans) ? -1 : 1;
  24. int side, uplo, trans;
  25. const BASE alpha_real = CONST_REAL0(alpha);
  26. const BASE alpha_imag = CONST_IMAG0(alpha);
  27. if (Order == CblasRowMajor) {
  28. n1 = M;
  29. n2 = N;
  30. side = Side;
  31. uplo = Uplo;
  32. trans = TransA;
  33. trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans;
  34. } else {
  35. n1 = N;
  36. n2 = M;
  37. side = (Side == CblasLeft) ? CblasRight : CblasLeft; /* exchanged */
  38. uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper; /* exchanged */
  39. trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; /* same */
  40. }
  41. if (side == CblasLeft && uplo == CblasUpper && trans == CblasNoTrans) {
  42. /* form B := alpha * inv(TriU(A)) *B */
  43. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  44. for (i = 0; i < n1; i++) {
  45. for (j = 0; j < n2; j++) {
  46. const BASE Bij_real = REAL(B, ldb * i + j);
  47. const BASE Bij_imag = IMAG(B, ldb * i + j);
  48. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  49. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  50. }
  51. }
  52. }
  53. for (i = n1; i > 0 && i--;) {
  54. if (nonunit) {
  55. const BASE Aii_real = CONST_REAL(A, lda * i + i);
  56. const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
  57. const BASE s = xhypot(Aii_real, Aii_imag);
  58. const BASE a_real = Aii_real / s;
  59. const BASE a_imag = Aii_imag / s;
  60. for (j = 0; j < n2; j++) {
  61. const BASE Bij_real = REAL(B, ldb * i + j);
  62. const BASE Bij_imag = IMAG(B, ldb * i + j);
  63. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  64. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  65. }
  66. }
  67. for (k = 0; k < i; k++) {
  68. const BASE Aki_real = CONST_REAL(A, k * lda + i);
  69. const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i);
  70. for (j = 0; j < n2; j++) {
  71. const BASE Bij_real = REAL(B, ldb * i + j);
  72. const BASE Bij_imag = IMAG(B, ldb * i + j);
  73. REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag;
  74. IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real;
  75. }
  76. }
  77. }
  78. } else if (side == CblasLeft && uplo == CblasUpper && trans == CblasTrans) {
  79. /* form B := alpha * inv(TriU(A))' *B */
  80. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  81. for (i = 0; i < n1; i++) {
  82. for (j = 0; j < n2; j++) {
  83. const BASE Bij_real = REAL(B, ldb * i + j);
  84. const BASE Bij_imag = IMAG(B, ldb * i + j);
  85. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  86. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  87. }
  88. }
  89. }
  90. for (i = 0; i < n1; i++) {
  91. if (nonunit) {
  92. const BASE Aii_real = CONST_REAL(A, lda * i + i);
  93. const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
  94. const BASE s = xhypot(Aii_real, Aii_imag);
  95. const BASE a_real = Aii_real / s;
  96. const BASE a_imag = Aii_imag / s;
  97. for (j = 0; j < n2; j++) {
  98. const BASE Bij_real = REAL(B, ldb * i + j);
  99. const BASE Bij_imag = IMAG(B, ldb * i + j);
  100. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  101. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  102. }
  103. }
  104. for (k = i + 1; k < n1; k++) {
  105. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  106. const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k);
  107. for (j = 0; j < n2; j++) {
  108. const BASE Bij_real = REAL(B, ldb * i + j);
  109. const BASE Bij_imag = IMAG(B, ldb * i + j);
  110. REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag;
  111. IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real;
  112. }
  113. }
  114. }
  115. } else if (side == CblasLeft && uplo == CblasLower && trans == CblasNoTrans) {
  116. /* form B := alpha * inv(TriL(A))*B */
  117. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  118. for (i = 0; i < n1; i++) {
  119. for (j = 0; j < n2; j++) {
  120. const BASE Bij_real = REAL(B, ldb * i + j);
  121. const BASE Bij_imag = IMAG(B, ldb * i + j);
  122. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  123. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  124. }
  125. }
  126. }
  127. for (i = 0; i < n1; i++) {
  128. if (nonunit) {
  129. const BASE Aii_real = CONST_REAL(A, lda * i + i);
  130. const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
  131. const BASE s = xhypot(Aii_real, Aii_imag);
  132. const BASE a_real = Aii_real / s;
  133. const BASE a_imag = Aii_imag / s;
  134. for (j = 0; j < n2; j++) {
  135. const BASE Bij_real = REAL(B, ldb * i + j);
  136. const BASE Bij_imag = IMAG(B, ldb * i + j);
  137. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  138. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  139. }
  140. }
  141. for (k = i + 1; k < n1; k++) {
  142. const BASE Aki_real = CONST_REAL(A, k * lda + i);
  143. const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i);
  144. for (j = 0; j < n2; j++) {
  145. const BASE Bij_real = REAL(B, ldb * i + j);
  146. const BASE Bij_imag = IMAG(B, ldb * i + j);
  147. REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag;
  148. IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real;
  149. }
  150. }
  151. }
  152. } else if (side == CblasLeft && uplo == CblasLower && trans == CblasTrans) {
  153. /* form B := alpha * TriL(A)' *B */
  154. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  155. for (i = 0; i < n1; i++) {
  156. for (j = 0; j < n2; j++) {
  157. const BASE Bij_real = REAL(B, ldb * i + j);
  158. const BASE Bij_imag = IMAG(B, ldb * i + j);
  159. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  160. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  161. }
  162. }
  163. }
  164. for (i = n1; i > 0 && i--;) {
  165. if (nonunit) {
  166. const BASE Aii_real = CONST_REAL(A, lda * i + i);
  167. const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
  168. const BASE s = xhypot(Aii_real, Aii_imag);
  169. const BASE a_real = Aii_real / s;
  170. const BASE a_imag = Aii_imag / s;
  171. for (j = 0; j < n2; j++) {
  172. const BASE Bij_real = REAL(B, ldb * i + j);
  173. const BASE Bij_imag = IMAG(B, ldb * i + j);
  174. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  175. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  176. }
  177. }
  178. for (k = 0; k < i; k++) {
  179. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  180. const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k);
  181. for (j = 0; j < n2; j++) {
  182. const BASE Bij_real = REAL(B, ldb * i + j);
  183. const BASE Bij_imag = IMAG(B, ldb * i + j);
  184. REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag;
  185. IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real;
  186. }
  187. }
  188. }
  189. } else if (side == CblasRight && uplo == CblasUpper && trans == CblasNoTrans) {
  190. /* form B := alpha * B * inv(TriU(A)) */
  191. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  192. for (i = 0; i < n1; i++) {
  193. for (j = 0; j < n2; j++) {
  194. const BASE Bij_real = REAL(B, ldb * i + j);
  195. const BASE Bij_imag = IMAG(B, ldb * i + j);
  196. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  197. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  198. }
  199. }
  200. }
  201. for (i = 0; i < n1; i++) {
  202. for (j = 0; j < n2; j++) {
  203. if (nonunit) {
  204. const BASE Ajj_real = CONST_REAL(A, lda * j + j);
  205. const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
  206. const BASE s = xhypot(Ajj_real, Ajj_imag);
  207. const BASE a_real = Ajj_real / s;
  208. const BASE a_imag = Ajj_imag / s;
  209. const BASE Bij_real = REAL(B, ldb * i + j);
  210. const BASE Bij_imag = IMAG(B, ldb * i + j);
  211. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  212. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  213. }
  214. {
  215. const BASE Bij_real = REAL(B, ldb * i + j);
  216. const BASE Bij_imag = IMAG(B, ldb * i + j);
  217. for (k = j + 1; k < n2; k++) {
  218. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  219. const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k);
  220. REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag;
  221. IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real;
  222. }
  223. }
  224. }
  225. }
  226. } else if (side == CblasRight && uplo == CblasUpper && trans == CblasTrans) {
  227. /* form B := alpha * B * inv(TriU(A))' */
  228. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  229. for (i = 0; i < n1; i++) {
  230. for (j = 0; j < n2; j++) {
  231. const BASE Bij_real = REAL(B, ldb * i + j);
  232. const BASE Bij_imag = IMAG(B, ldb * i + j);
  233. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  234. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  235. }
  236. }
  237. }
  238. for (i = 0; i < n1; i++) {
  239. for (j = n2; j > 0 && j--;) {
  240. if (nonunit) {
  241. const BASE Ajj_real = CONST_REAL(A, lda * j + j);
  242. const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
  243. const BASE s = xhypot(Ajj_real, Ajj_imag);
  244. const BASE a_real = Ajj_real / s;
  245. const BASE a_imag = Ajj_imag / s;
  246. const BASE Bij_real = REAL(B, ldb * i + j);
  247. const BASE Bij_imag = IMAG(B, ldb * i + j);
  248. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  249. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  250. }
  251. {
  252. const BASE Bij_real = REAL(B, ldb * i + j);
  253. const BASE Bij_imag = IMAG(B, ldb * i + j);
  254. for (k = 0; k < j; k++) {
  255. const BASE Akj_real = CONST_REAL(A, k * lda + j);
  256. const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j);
  257. REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag;
  258. IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real;
  259. }
  260. }
  261. }
  262. }
  263. } else if (side == CblasRight && uplo == CblasLower && trans == CblasNoTrans) {
  264. /* form B := alpha * B * inv(TriL(A)) */
  265. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  266. for (i = 0; i < n1; i++) {
  267. for (j = 0; j < n2; j++) {
  268. const BASE Bij_real = REAL(B, ldb * i + j);
  269. const BASE Bij_imag = IMAG(B, ldb * i + j);
  270. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  271. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  272. }
  273. }
  274. }
  275. for (i = 0; i < n1; i++) {
  276. for (j = n2; j > 0 && j--;) {
  277. if (nonunit) {
  278. const BASE Ajj_real = CONST_REAL(A, lda * j + j);
  279. const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
  280. const BASE s = xhypot(Ajj_real, Ajj_imag);
  281. const BASE a_real = Ajj_real / s;
  282. const BASE a_imag = Ajj_imag / s;
  283. const BASE Bij_real = REAL(B, ldb * i + j);
  284. const BASE Bij_imag = IMAG(B, ldb * i + j);
  285. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  286. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  287. }
  288. {
  289. const BASE Bij_real = REAL(B, ldb * i + j);
  290. const BASE Bij_imag = IMAG(B, ldb * i + j);
  291. for (k = 0; k < j; k++) {
  292. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  293. const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k);
  294. REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag;
  295. IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real;
  296. }
  297. }
  298. }
  299. }
  300. } else if (side == CblasRight && uplo == CblasLower && trans == CblasTrans) {
  301. /* form B := alpha * B * inv(TriL(A))' */
  302. if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
  303. for (i = 0; i < n1; i++) {
  304. for (j = 0; j < n2; j++) {
  305. const BASE Bij_real = REAL(B, ldb * i + j);
  306. const BASE Bij_imag = IMAG(B, ldb * i + j);
  307. REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
  308. IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
  309. }
  310. }
  311. }
  312. for (i = 0; i < n1; i++) {
  313. for (j = 0; j < n2; j++) {
  314. if (nonunit) {
  315. const BASE Ajj_real = CONST_REAL(A, lda * j + j);
  316. const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
  317. const BASE s = xhypot(Ajj_real, Ajj_imag);
  318. const BASE a_real = Ajj_real / s;
  319. const BASE a_imag = Ajj_imag / s;
  320. const BASE Bij_real = REAL(B, ldb * i + j);
  321. const BASE Bij_imag = IMAG(B, ldb * i + j);
  322. REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
  323. IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
  324. }
  325. {
  326. const BASE Bij_real = REAL(B, ldb * i + j);
  327. const BASE Bij_imag = IMAG(B, ldb * i + j);
  328. for (k = j + 1; k < n2; k++) {
  329. const BASE Akj_real = CONST_REAL(A, k * lda + j);
  330. const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j);
  331. REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag;
  332. IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real;
  333. }
  334. }
  335. }
  336. }
  337. } else {
  338. BLAS_ERROR("unrecognized operation");
  339. }
  340. }