gsl_cblas__source_her2k.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. /* blas/source_her2k_c.h
  2. *
  3. * Copyright (C) 2001, 2007 Brian Gough
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. {
  20. INDEX i, j, k;
  21. int uplo, trans;
  22. const BASE alpha_real = CONST_REAL0(alpha);
  23. BASE alpha_imag = CONST_IMAG0(alpha);
  24. if (beta == 1.0 && ((alpha_real == 0.0 && alpha_imag == 0.0) || K == 0))
  25. return;
  26. if (Order == CblasRowMajor) {
  27. uplo = Uplo;
  28. trans = Trans;
  29. } else {
  30. uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper;
  31. trans = (Trans == CblasNoTrans) ? CblasConjTrans : CblasNoTrans;
  32. alpha_imag *= -1; /* conjugate alpha */
  33. }
  34. /* form C := beta*C */
  35. if (beta == 0.0) {
  36. if (uplo == CblasUpper) {
  37. for (i = 0; i < N; i++) {
  38. for (j = i; j < N; j++) {
  39. REAL(C, ldc * i + j) = 0.0;
  40. IMAG(C, ldc * i + j) = 0.0;
  41. }
  42. }
  43. } else {
  44. for (i = 0; i < N; i++) {
  45. for (j = 0; j <= i; j++) {
  46. REAL(C, ldc * i + j) = 0.0;
  47. IMAG(C, ldc * i + j) = 0.0;
  48. }
  49. }
  50. }
  51. } else if (beta != 1.0) {
  52. if (uplo == CblasUpper) {
  53. for (i = 0; i < N; i++) {
  54. REAL(C, ldc * i + i) *= beta;
  55. IMAG(C, ldc * i + i) = 0.0;
  56. for (j = i + 1; j < N; j++) {
  57. REAL(C, ldc * i + j) *= beta;
  58. IMAG(C, ldc * i + j) *= beta;
  59. }
  60. }
  61. } else {
  62. for (i = 0; i < N; i++) {
  63. for (j = 0; j < i; j++) {
  64. REAL(C, ldc * i + j) *= beta;
  65. IMAG(C, ldc * i + j) *= beta;
  66. }
  67. REAL(C, ldc * i + i) *= beta;
  68. IMAG(C, ldc * i + i) = 0.0;
  69. }
  70. }
  71. } else {
  72. for (i = 0; i < N; i++) {
  73. IMAG(C, ldc * i + i) = 0.0;
  74. }
  75. }
  76. if (alpha_real == 0.0 && alpha_imag == 0.0)
  77. return;
  78. if (uplo == CblasUpper && trans == CblasNoTrans) {
  79. for (i = 0; i < N; i++) {
  80. /* Cii += alpha Aik conj(Bik) + conj(alpha) Bik conj(Aik) */
  81. {
  82. BASE temp_real = 0.0;
  83. /* BASE temp_imag = 0.0; */
  84. for (k = 0; k < K; k++) {
  85. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  86. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  87. /* temp1 = alpha * Aik */
  88. const BASE temp1_real = alpha_real * Aik_real - alpha_imag * Aik_imag;
  89. const BASE temp1_imag = alpha_real * Aik_imag + alpha_imag * Aik_real;
  90. const BASE Bik_real = CONST_REAL(B, i * ldb + k);
  91. const BASE Bik_imag = CONST_IMAG(B, i * ldb + k);
  92. temp_real += temp1_real * Bik_real + temp1_imag * Bik_imag;
  93. }
  94. REAL(C, i * ldc + i) += 2 * temp_real;
  95. IMAG(C, i * ldc + i) = 0.0;
  96. }
  97. /* Cij += alpha Aik conj(Bjk) + conj(alpha) Bik conj(Ajk) */
  98. for (j = i + 1; j < N; j++) {
  99. BASE temp_real = 0.0;
  100. BASE temp_imag = 0.0;
  101. for (k = 0; k < K; k++) {
  102. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  103. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  104. /* temp1 = alpha * Aik */
  105. const BASE temp1_real = alpha_real * Aik_real - alpha_imag * Aik_imag;
  106. const BASE temp1_imag = alpha_real * Aik_imag + alpha_imag * Aik_real;
  107. const BASE Bik_real = CONST_REAL(B, i * ldb + k);
  108. const BASE Bik_imag = CONST_IMAG(B, i * ldb + k);
  109. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  110. const BASE Ajk_imag = CONST_IMAG(A, j * lda + k);
  111. /* temp2 = alpha * Ajk */
  112. const BASE temp2_real = alpha_real * Ajk_real - alpha_imag * Ajk_imag;
  113. const BASE temp2_imag = alpha_real * Ajk_imag + alpha_imag * Ajk_real;
  114. const BASE Bjk_real = CONST_REAL(B, j * ldb + k);
  115. const BASE Bjk_imag = CONST_IMAG(B, j * ldb + k);
  116. /* Cij += alpha * Aik * conj(Bjk) + conj(alpha) * Bik * conj(Ajk) */
  117. temp_real += ((temp1_real * Bjk_real + temp1_imag * Bjk_imag)
  118. + (Bik_real * temp2_real + Bik_imag * temp2_imag));
  119. temp_imag += ((temp1_real * (-Bjk_imag) + temp1_imag * Bjk_real)
  120. + (Bik_real * (-temp2_imag) + Bik_imag * temp2_real));
  121. }
  122. REAL(C, i * ldc + j) += temp_real;
  123. IMAG(C, i * ldc + j) += temp_imag;
  124. }
  125. }
  126. } else if (uplo == CblasUpper && trans == CblasConjTrans) {
  127. for (k = 0; k < K; k++) {
  128. for (i = 0; i < N; i++) {
  129. BASE Aki_real = CONST_REAL(A, k * lda + i);
  130. BASE Aki_imag = CONST_IMAG(A, k * lda + i);
  131. BASE Bki_real = CONST_REAL(B, k * ldb + i);
  132. BASE Bki_imag = CONST_IMAG(B, k * ldb + i);
  133. /* temp1 = alpha * conj(Aki) */
  134. BASE temp1_real = alpha_real * Aki_real - alpha_imag * (-Aki_imag);
  135. BASE temp1_imag = alpha_real * (-Aki_imag) + alpha_imag * Aki_real;
  136. /* temp2 = conj(alpha) * conj(Bki) */
  137. BASE temp2_real = alpha_real * Bki_real - alpha_imag * Bki_imag;
  138. BASE temp2_imag = -(alpha_real * Bki_imag + alpha_imag * Bki_real);
  139. /* Cii += alpha * conj(Aki) * Bki + conj(alpha) * conj(Bki) * Aki */
  140. {
  141. REAL(C, i * lda + i) += 2 * (temp1_real * Bki_real - temp1_imag * Bki_imag);
  142. IMAG(C, i * lda + i) = 0.0;
  143. }
  144. for (j = i + 1; j < N; j++) {
  145. BASE Akj_real = CONST_REAL(A, k * lda + j);
  146. BASE Akj_imag = CONST_IMAG(A, k * lda + j);
  147. BASE Bkj_real = CONST_REAL(B, k * ldb + j);
  148. BASE Bkj_imag = CONST_IMAG(B, k * ldb + j);
  149. /* Cij += alpha * conj(Aki) * Bkj + conj(alpha) * conj(Bki) * Akj */
  150. REAL(C, i * lda + j) += (temp1_real * Bkj_real - temp1_imag * Bkj_imag)
  151. + (temp2_real * Akj_real - temp2_imag * Akj_imag);
  152. IMAG(C, i * lda + j) += (temp1_real * Bkj_imag + temp1_imag * Bkj_real)
  153. + (temp2_real * Akj_imag + temp2_imag * Akj_real);
  154. }
  155. }
  156. }
  157. } else if (uplo == CblasLower && trans == CblasNoTrans) {
  158. for (i = 0; i < N; i++) {
  159. /* Cij += alpha Aik conj(Bjk) + conj(alpha) Bik conj(Ajk) */
  160. for (j = 0; j < i; j++) {
  161. BASE temp_real = 0.0;
  162. BASE temp_imag = 0.0;
  163. for (k = 0; k < K; k++) {
  164. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  165. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  166. /* temp1 = alpha * Aik */
  167. const BASE temp1_real = alpha_real * Aik_real - alpha_imag * Aik_imag;
  168. const BASE temp1_imag = alpha_real * Aik_imag + alpha_imag * Aik_real;
  169. const BASE Bik_real = CONST_REAL(B, i * ldb + k);
  170. const BASE Bik_imag = CONST_IMAG(B, i * ldb + k);
  171. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  172. const BASE Ajk_imag = CONST_IMAG(A, j * lda + k);
  173. /* temp2 = alpha * Ajk */
  174. const BASE temp2_real = alpha_real * Ajk_real - alpha_imag * Ajk_imag;
  175. const BASE temp2_imag = alpha_real * Ajk_imag + alpha_imag * Ajk_real;
  176. const BASE Bjk_real = CONST_REAL(B, j * ldb + k);
  177. const BASE Bjk_imag = CONST_IMAG(B, j * ldb + k);
  178. /* Cij += alpha * Aik * conj(Bjk) + conj(alpha) * Bik * conj(Ajk) */
  179. temp_real += ((temp1_real * Bjk_real + temp1_imag * Bjk_imag)
  180. + (Bik_real * temp2_real + Bik_imag * temp2_imag));
  181. temp_imag += ((temp1_real * (-Bjk_imag) + temp1_imag * Bjk_real)
  182. + (Bik_real * (-temp2_imag) + Bik_imag * temp2_real));
  183. }
  184. REAL(C, i * ldc + j) += temp_real;
  185. IMAG(C, i * ldc + j) += temp_imag;
  186. }
  187. /* Cii += alpha Aik conj(Bik) + conj(alpha) Bik conj(Aik) */
  188. {
  189. BASE temp_real = 0.0;
  190. /* BASE temp_imag = 0.0; */
  191. for (k = 0; k < K; k++) {
  192. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  193. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  194. /* temp1 = alpha * Aik */
  195. const BASE temp1_real = alpha_real * Aik_real - alpha_imag * Aik_imag;
  196. const BASE temp1_imag = alpha_real * Aik_imag + alpha_imag * Aik_real;
  197. const BASE Bik_real = CONST_REAL(B, i * ldb + k);
  198. const BASE Bik_imag = CONST_IMAG(B, i * ldb + k);
  199. temp_real += temp1_real * Bik_real + temp1_imag * Bik_imag;
  200. }
  201. REAL(C, i * ldc + i) += 2 * temp_real;
  202. IMAG(C, i * ldc + i) = 0.0;
  203. }
  204. }
  205. } else if (uplo == CblasLower && trans == CblasConjTrans) {
  206. for (k = 0; k < K; k++) {
  207. for (i = 0; i < N; i++) {
  208. BASE Aki_real = CONST_REAL(A, k * lda + i);
  209. BASE Aki_imag = CONST_IMAG(A, k * lda + i);
  210. BASE Bki_real = CONST_REAL(B, k * ldb + i);
  211. BASE Bki_imag = CONST_IMAG(B, k * ldb + i);
  212. /* temp1 = alpha * conj(Aki) */
  213. BASE temp1_real = alpha_real * Aki_real - alpha_imag * (-Aki_imag);
  214. BASE temp1_imag = alpha_real * (-Aki_imag) + alpha_imag * Aki_real;
  215. /* temp2 = conj(alpha) * conj(Bki) */
  216. BASE temp2_real = alpha_real * Bki_real - alpha_imag * Bki_imag;
  217. BASE temp2_imag = -(alpha_real * Bki_imag + alpha_imag * Bki_real);
  218. for (j = 0; j < i; j++) {
  219. BASE Akj_real = CONST_REAL(A, k * lda + j);
  220. BASE Akj_imag = CONST_IMAG(A, k * lda + j);
  221. BASE Bkj_real = CONST_REAL(B, k * ldb + j);
  222. BASE Bkj_imag = CONST_IMAG(B, k * ldb + j);
  223. /* Cij += alpha * conj(Aki) * Bkj + conj(alpha) * conj(Bki) * Akj */
  224. REAL(C, i * lda + j) += (temp1_real * Bkj_real - temp1_imag * Bkj_imag)
  225. + (temp2_real * Akj_real - temp2_imag * Akj_imag);
  226. IMAG(C, i * lda + j) += (temp1_real * Bkj_imag + temp1_imag * Bkj_real)
  227. + (temp2_real * Akj_imag + temp2_imag * Akj_real);
  228. }
  229. /* Cii += alpha * conj(Aki) * Bki + conj(alpha) * conj(Bki) * Aki */
  230. {
  231. REAL(C, i * lda + i) += 2 * (temp1_real * Bki_real - temp1_imag * Bki_imag);
  232. IMAG(C, i * lda + i) = 0.0;
  233. }
  234. }
  235. }
  236. } else {
  237. BLAS_ERROR("unrecognized operation");
  238. }
  239. }