solver_opt.c 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Tema 2 ASC
  3. * 2020 Spring
  4. */
  5. #include "utils.h"
  6. /*
  7. * Add your optimized implementation here
  8. */
  9. double* my_solver(int N, double *A, double *B)
  10. {
  11. int i, j, k;
  12. double *BAt, *AB, *C;
  13. register double *ptr1, *ptr2, *ptr3, *ptr4;
  14. register double sum;
  15. register int index;
  16. BAt = (double *) malloc(N * N * sizeof(double));
  17. AB = (double *) malloc(N * N * sizeof(double));
  18. C = (double *) malloc(N * N * sizeof(double));
  19. for (i = 0; i < N; i ++) {
  20. index = i * N;
  21. ptr3 = &(BAt[index]);
  22. ptr4 = &(AB[index]);
  23. for (j = 0; j < N; j++) {
  24. sum = 0.0;
  25. ptr1 = &A[(j * N) + j];
  26. ptr2 = &B[(index) + j];
  27. /* B * (A transpus) */
  28. for (k = j; k < N; k++) {
  29. /* sum += A[(j * N) + k] * B[(i * N) + k]; */
  30. sum += *ptr1 * *ptr2;
  31. ptr1++;
  32. ptr2++;
  33. }
  34. *ptr3 = sum;
  35. /* BAt[(i * N) + j] = sum; */
  36. sum = 0.0;
  37. ptr1 = &A[(index) + i];
  38. ptr2 = &B[(index) + j];
  39. /* A * B */
  40. for (k = i; k < N; k++) {
  41. sum += *ptr1 * *ptr2;
  42. /* sum += A[(i * N) + k] * B[(k * N) + j]; */
  43. ptr1++;
  44. ptr2 += N;
  45. }
  46. /* AB[(i * N) + j] = sum; */
  47. *ptr4 = sum;
  48. ptr3++;
  49. ptr4++;
  50. }
  51. }
  52. for (i = 0; i < N; i++) {
  53. index = i * N;
  54. ptr3 = &(C[index]);
  55. ptr4 = &(BAt[index]);
  56. for (j = 0; j < N; j++) {
  57. sum = 0.0;
  58. ptr1 = &(A[(i * N) + i]);
  59. ptr2 = &(AB[(i * N) + j]);
  60. for (k = i; k < N; k++) {
  61. /* sum += A[(i * N) + k] * AB[(k * N) + j]; */
  62. sum += *ptr1 * *ptr2;
  63. ptr1++;
  64. ptr2 += N;
  65. }
  66. /* C[(i * N) + j] = BAt[(i * N) + j] + sum; */
  67. *ptr3 = *ptr4 + sum;
  68. ptr3++;
  69. ptr4++;
  70. }
  71. }
  72. free(BAt);
  73. free(AB);
  74. return C;
  75. }