gsl_cblas__source_hemm.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /* blas/source_hemm.h
  2. *
  3. * Copyright (C) 2001, 2007 Brian Gough
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. {
  20. INDEX i, j, k;
  21. INDEX n1, n2;
  22. int uplo, side;
  23. const BASE alpha_real = CONST_REAL0(alpha);
  24. const BASE alpha_imag = CONST_IMAG0(alpha);
  25. const BASE beta_real = CONST_REAL0(beta);
  26. const BASE beta_imag = CONST_IMAG0(beta);
  27. if ((alpha_real == 0.0 && alpha_imag == 0.0)
  28. && (beta_real == 1.0 && beta_imag == 0.0))
  29. return;
  30. if (Order == CblasRowMajor) {
  31. n1 = M;
  32. n2 = N;
  33. uplo = Uplo;
  34. side = Side;
  35. } else {
  36. n1 = N;
  37. n2 = M;
  38. uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper;
  39. side = (Side == CblasLeft) ? CblasRight : CblasLeft;
  40. }
  41. /* form y := beta*y */
  42. if (beta_real == 0.0 && beta_imag == 0.0) {
  43. for (i = 0; i < n1; i++) {
  44. for (j = 0; j < n2; j++) {
  45. REAL(C, ldc * i + j) = 0.0;
  46. IMAG(C, ldc * i + j) = 0.0;
  47. }
  48. }
  49. } else if (!(beta_real == 1.0 && beta_imag == 0.0)) {
  50. for (i = 0; i < n1; i++) {
  51. for (j = 0; j < n2; j++) {
  52. const BASE Cij_real = REAL(C, ldc * i + j);
  53. const BASE Cij_imag = IMAG(C, ldc * i + j);
  54. REAL(C, ldc * i + j) = beta_real * Cij_real - beta_imag * Cij_imag;
  55. IMAG(C, ldc * i + j) = beta_real * Cij_imag + beta_imag * Cij_real;
  56. }
  57. }
  58. }
  59. if (alpha_real == 0.0 && alpha_imag == 0.0)
  60. return;
  61. if (side == CblasLeft && uplo == CblasUpper) {
  62. /* form C := alpha*A*B + C */
  63. for (i = 0; i < n1; i++) {
  64. for (j = 0; j < n2; j++) {
  65. const BASE Bij_real = CONST_REAL(B, ldb * i + j);
  66. const BASE Bij_imag = CONST_IMAG(B, ldb * i + j);
  67. const BASE temp1_real = alpha_real * Bij_real - alpha_imag * Bij_imag;
  68. const BASE temp1_imag = alpha_real * Bij_imag + alpha_imag * Bij_real;
  69. BASE temp2_real = 0.0;
  70. BASE temp2_imag = 0.0;
  71. {
  72. const BASE Aii_real = CONST_REAL(A, i * lda + i);
  73. /* const BASE Aii_imag = 0.0; */
  74. REAL(C, i * ldc + j) += temp1_real * Aii_real;
  75. IMAG(C, i * ldc + j) += temp1_imag * Aii_real;
  76. }
  77. for (k = i + 1; k < n1; k++) {
  78. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  79. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  80. const BASE Bkj_real = CONST_REAL(B, ldb * k + j);
  81. const BASE Bkj_imag = CONST_IMAG(B, ldb * k + j);
  82. REAL(C, k * ldc + j) += Aik_real * temp1_real - (-Aik_imag) * temp1_imag;
  83. IMAG(C, k * ldc + j) += Aik_real * temp1_imag + (-Aik_imag) * temp1_real;
  84. temp2_real += Aik_real * Bkj_real - Aik_imag * Bkj_imag;
  85. temp2_imag += Aik_real * Bkj_imag + Aik_imag * Bkj_real;
  86. }
  87. REAL(C, i * ldc + j) += alpha_real * temp2_real - alpha_imag * temp2_imag;
  88. IMAG(C, i * ldc + j) += alpha_real * temp2_imag + alpha_imag * temp2_real;
  89. }
  90. }
  91. } else if (side == CblasLeft && uplo == CblasLower) {
  92. /* form C := alpha*A*B + C */
  93. for (i = 0; i < n1; i++) {
  94. for (j = 0; j < n2; j++) {
  95. const BASE Bij_real = CONST_REAL(B, ldb * i + j);
  96. const BASE Bij_imag = CONST_IMAG(B, ldb * i + j);
  97. const BASE temp1_real = alpha_real * Bij_real - alpha_imag * Bij_imag;
  98. const BASE temp1_imag = alpha_real * Bij_imag + alpha_imag * Bij_real;
  99. BASE temp2_real = 0.0;
  100. BASE temp2_imag = 0.0;
  101. for (k = 0; k < i; k++) {
  102. const BASE Aik_real = CONST_REAL(A, i * lda + k);
  103. const BASE Aik_imag = CONST_IMAG(A, i * lda + k);
  104. const BASE Bkj_real = CONST_REAL(B, ldb * k + j);
  105. const BASE Bkj_imag = CONST_IMAG(B, ldb * k + j);
  106. REAL(C, k * ldc + j) += Aik_real * temp1_real - (-Aik_imag) * temp1_imag;
  107. IMAG(C, k * ldc + j) += Aik_real * temp1_imag + (-Aik_imag) * temp1_real;
  108. temp2_real += Aik_real * Bkj_real - Aik_imag * Bkj_imag;
  109. temp2_imag += Aik_real * Bkj_imag + Aik_imag * Bkj_real;
  110. }
  111. {
  112. const BASE Aii_real = CONST_REAL(A, i * lda + i);
  113. /* const BASE Aii_imag = 0.0; */
  114. REAL(C, i * ldc + j) += temp1_real * Aii_real;
  115. IMAG(C, i * ldc + j) += temp1_imag * Aii_real;
  116. }
  117. REAL(C, i * ldc + j) += alpha_real * temp2_real - alpha_imag * temp2_imag;
  118. IMAG(C, i * ldc + j) += alpha_real * temp2_imag + alpha_imag * temp2_real;
  119. }
  120. }
  121. } else if (side == CblasRight && uplo == CblasUpper) {
  122. /* form C := alpha*B*A + C */
  123. for (i = 0; i < n1; i++) {
  124. for (j = 0; j < n2; j++) {
  125. const BASE Bij_real = CONST_REAL(B, ldb * i + j);
  126. const BASE Bij_imag = CONST_IMAG(B, ldb * i + j);
  127. const BASE temp1_real = alpha_real * Bij_real - alpha_imag * Bij_imag;
  128. const BASE temp1_imag = alpha_real * Bij_imag + alpha_imag * Bij_real;
  129. BASE temp2_real = 0.0;
  130. BASE temp2_imag = 0.0;
  131. {
  132. const BASE Ajj_real = CONST_REAL(A, j * lda + j);
  133. /* const BASE Ajj_imag = 0.0; */
  134. REAL(C, i * ldc + j) += temp1_real * Ajj_real;
  135. IMAG(C, i * ldc + j) += temp1_imag * Ajj_real;
  136. }
  137. for (k = j + 1; k < n2; k++) {
  138. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  139. const BASE Ajk_imag = CONST_IMAG(A, j * lda + k);
  140. const BASE Bik_real = CONST_REAL(B, ldb * i + k);
  141. const BASE Bik_imag = CONST_IMAG(B, ldb * i + k);
  142. REAL(C, i * ldc + k) += temp1_real * Ajk_real - temp1_imag * Ajk_imag;
  143. IMAG(C, i * ldc + k) += temp1_real * Ajk_imag + temp1_imag * Ajk_real;
  144. temp2_real += Bik_real * Ajk_real - Bik_imag * (-Ajk_imag);
  145. temp2_imag += Bik_real * (-Ajk_imag) + Bik_imag * Ajk_real;
  146. }
  147. REAL(C, i * ldc + j) += alpha_real * temp2_real - alpha_imag * temp2_imag;
  148. IMAG(C, i * ldc + j) += alpha_real * temp2_imag + alpha_imag * temp2_real;
  149. }
  150. }
  151. } else if (side == CblasRight && uplo == CblasLower) {
  152. /* form C := alpha*B*A + C */
  153. for (i = 0; i < n1; i++) {
  154. for (j = 0; j < n2; j++) {
  155. const BASE Bij_real = CONST_REAL(B, ldb * i + j);
  156. const BASE Bij_imag = CONST_IMAG(B, ldb * i + j);
  157. const BASE temp1_real = alpha_real * Bij_real - alpha_imag * Bij_imag;
  158. const BASE temp1_imag = alpha_real * Bij_imag + alpha_imag * Bij_real;
  159. BASE temp2_real = 0.0;
  160. BASE temp2_imag = 0.0;
  161. for (k = 0; k < j; k++) {
  162. const BASE Ajk_real = CONST_REAL(A, j * lda + k);
  163. const BASE Ajk_imag = CONST_IMAG(A, j * lda + k);
  164. const BASE Bik_real = CONST_REAL(B, ldb * i + k);
  165. const BASE Bik_imag = CONST_IMAG(B, ldb * i + k);
  166. REAL(C, i * ldc + k) += temp1_real * Ajk_real - temp1_imag * Ajk_imag;
  167. IMAG(C, i * ldc + k) += temp1_real * Ajk_imag + temp1_imag * Ajk_real;
  168. temp2_real += Bik_real * Ajk_real - Bik_imag * (-Ajk_imag);
  169. temp2_imag += Bik_real * (-Ajk_imag) + Bik_imag * Ajk_real;
  170. }
  171. {
  172. const BASE Ajj_real = CONST_REAL(A, j * lda + j);
  173. /* const BASE Ajj_imag = 0.0; */
  174. REAL(C, i * ldc + j) += temp1_real * Ajj_real;
  175. IMAG(C, i * ldc + j) += temp1_imag * Ajj_real;
  176. }
  177. REAL(C, i * ldc + j) += alpha_real * temp2_real - alpha_imag * temp2_imag;
  178. IMAG(C, i * ldc + j) += alpha_real * temp2_imag + alpha_imag * temp2_real;
  179. }
  180. }
  181. } else {
  182. BLAS_ERROR("unrecognized operation");
  183. }
  184. }