laplace3d.cc 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra/examples - Solve Poisson equation in a parallelepiped, with linear elements.
  3. // (c) Daniel Llorens - 2017
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. // Adapted from lsolver/laplace3d.cc by Christian Badura, 1998/05.
  9. // This benchmarks fairly close to the Cish original. We spend comparatively
  10. // less time in cghs than in laplace2d.cc, so not using BLAS doesn't stand out as
  11. // much as it does there.
  12. #include "ra/ra.hh"
  13. #include "ra/test.hh"
  14. #include "examples/cghs.hh"
  15. #include "ra/bench.hh"
  16. #include <numbers>
  17. using std::cout, std::endl, std::flush, ra::TestRecorder;
  18. constexpr auto PI = std::numbers::pi_v<double>;
  19. Benchmark::clock::duration tmul(0);
  20. double const d23=2./3., d16=-1./6., d00=0.;
  21. double const d827=8./27., d427=4./27., d227=2./27., d127=1./27.;
  22. double const SM[8][8] = { {d23, d00, d16, d00, d00, d16, d16, d16},
  23. {d00, d23, d00, d16, d16, d00, d16, d16},
  24. {d16, d00, d23, d00, d16, d16, d00, d16},
  25. {d00, d16, d00, d23, d16, d16, d16, d00},
  26. {d00, d16, d16, d16, d23, d00, d16, d00},
  27. {d16, d00, d16, d16, d00, d23, d00, d16},
  28. {d16, d16, d00, d16, d16, d00, d23, d00},
  29. {d16, d16, d16, d00, d00, d16, d00, d23} };
  30. double const MM[8][8] = { {d827, d427, d227, d427, d427, d227, d127, d227},
  31. {d427, d827, d427, d227, d227, d427, d227, d127},
  32. {d227, d427, d827, d427, d127, d227, d427, d227},
  33. {d427, d227, d427, d827, d227, d127, d227, d427},
  34. {d427, d227, d127, d227, d827, d427, d227, d427},
  35. {d227, d427, d227, d127, d427, d827, d427, d227},
  36. {d127, d227, d427, d227, d227, d427, d827, d427},
  37. {d227, d127, d227, d427, d427, d227, d427, d827} };
  38. int const ISF[8][3] = { {0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0},
  39. {0, 0, 1}, {1, 0, 1}, {1, 1, 1}, {0, 1, 1} };
  40. struct Element
  41. {
  42. int v[8]; // indices of 8 vertices.
  43. double reg1[8], reg2[8]; // temporary storage.
  44. };
  45. struct Matrix0bnd
  46. {
  47. ra::Small<double, 8, 8> M; // element matrix.
  48. double h; // scale factor.
  49. ra::Big<Element, 1> & E; // elements.
  50. ra::Big<int, 1> & B; // indices of boundary elements.
  51. };
  52. template <class A, class V, class W>
  53. void gemv(A const & a, V const & v, W & w)
  54. {
  55. auto pa = a.data();
  56. for (int i=0; i<a.len(0); ++i) {
  57. double tmp=0.;
  58. for (int j=0; j<a.len(1); ++j) {
  59. tmp += pa[j*a.len(1)+i]*v[j];
  60. // tmp += a(i, j)*v[j]; // FIXME is a 1/3 slower; maybe what makes ra::gemv slower overall
  61. }
  62. w[i] = tmp;
  63. }
  64. }
  65. template <class V, class W>
  66. void mult(Matrix0bnd & A, V const & v, W & w)
  67. {
  68. auto t0 = Benchmark::clock::now();
  69. w = 0.;
  70. for_each([&](auto && E)
  71. {
  72. ra::start(E.reg1) = v(E.v); // vector -> element
  73. gemv(A.M, E.reg1, E.reg2); // element matrix
  74. // ra::start(E.reg2) = ra::gemv(A.M, E.reg1); // FIXME somewhat slower
  75. w(E.v) += E.reg2; // element -> vector
  76. },
  77. A.E);
  78. w(A.B) = 0; // set boundary
  79. w *= A.h; // scale factor
  80. tmul += Benchmark::clock::now()-t0;
  81. }
  82. struct StiffMatrix: Matrix0bnd
  83. {
  84. StiffMatrix(double h, ra::Big<Element, 1> & E, ra::Big<int, 1> & B)
  85. : Matrix0bnd { SM, h, E, B } {};
  86. };
  87. struct MassMatrix: Matrix0bnd
  88. {
  89. MassMatrix(double h, ra::Big<Element, 1> & E, ra::Big<int, 1> & B)
  90. : Matrix0bnd { MM, h*h*h, E, B } {};
  91. };
  92. inline int pos(int n, int i, int j, int k)
  93. {
  94. return n*(n*i + j) + k;
  95. }
  96. // CosCosCosSolution, CosCosCos in [-1, 1]^3
  97. double g(double x, double y, double z)
  98. {
  99. return cos(.5*PI*x)*cos(.5*PI*y)*cos(.5*PI*z);
  100. }
  101. double f(double x, double y, double z)
  102. {
  103. return .25*3.*PI*PI*cos(.5*PI*x)*cos(.5*PI*y)*cos(.5*PI*z);
  104. }
  105. int main()
  106. {
  107. TestRecorder tr(std::cout);
  108. auto t0 = Benchmark::clock::now();
  109. int n = 50;
  110. double EPS = 1e-5;
  111. double h = 1./n; // space = ±1, so grid step = 2h
  112. ra::Big<ra::Small<double, 3>, 1> V({(n+1)*(n+1)*(n+1)}, ra::none); // vertex coordinates.
  113. ra::Big<int, 1> B({6*(n+1)*(n+1)}, ra::none); // indices of boundary vertices.
  114. ra::Big<Element, 1> E({n*n*n}, ra::none); // the elements.
  115. // set vertex coordinates.
  116. auto ip = ra::iota(n+1);
  117. ply(from([&](auto i, auto j, auto k)
  118. { V[pos(n+1, i, j, k)] = { -1.+2.*i*h, -1.+2.*j*h, -1.+2.*k*h }; },
  119. ip, ip, ip));
  120. // set elements.
  121. auto i = ra::iota(n);
  122. ply(from([&](auto i, auto j, auto k, auto v)
  123. { E(pos(n, i, j, k)).v[v] = pos(n+1, i+ISF[v][0], j+ISF[v][1], k+ISF[v][2]); },
  124. i, i, i, ra::iota(8)));
  125. // set boundaries (points on edges and corners multiple times --doesn't matter).
  126. int k = 0;
  127. for (int i=0; i<=n; ++i) {
  128. for (int j=0; j<=n; ++j) {
  129. B[k++] = pos(n+1, 0, i, j);
  130. B[k++] = pos(n+1, n, i, j);
  131. B[k++] = pos(n+1, i, 0, j);
  132. B[k++] = pos(n+1, i, n, j);
  133. B[k++] = pos(n+1, i, j, 0);
  134. B[k++] = pos(n+1, i, j, n);
  135. }
  136. }
  137. ra::Big<double, 1> b({(n+1)*(n+1)*(n+1)}, 0.); // right hand side
  138. ra::Big<double, 1> x({(n+1)*(n+1)*(n+1)}, 0.); // solution vector
  139. // set right side.
  140. ra::Big<double, 1> aux = map([](auto && Vi) { return f(Vi[0], Vi[1], Vi[2]); }, V);
  141. // integral ~ product with mass matrix.
  142. MassMatrix MM(h, E, B);
  143. mult(MM, aux, b);
  144. // solve.
  145. StiffMatrix SM(h, E, B);
  146. ra::Big<double, 2> work({3, (n+1)*(n+1)*(n+1)}, ra::none);
  147. auto t1 = Benchmark::clock::now();
  148. int its = cghs(SM, b, x, work, EPS);
  149. auto tsolve = Benchmark::clock::now()-t1;
  150. // compare with exact solution.
  151. ra::Big<double, 1> c = map([](auto && Vi) { return g(Vi[0], Vi[1], Vi[2]); }, V);
  152. double err = amax(abs(c-x));
  153. auto ttot = Benchmark::clock::now()-t0;
  154. cout << "total time " << Benchmark::toseconds(ttot)/1e-3 << " " << "ms" << endl;
  155. cout << "solve time " << Benchmark::toseconds(tsolve)/1e-3 << " " << "ms" << endl;
  156. cout << "mul time " << Benchmark::toseconds(tmul)/1e-3 << " " << "ms" << endl;
  157. tr.info("err ", err).test_le(err, 0.00033);
  158. tr.info("its ", its).test_le(its, 1);
  159. return tr.summary();
  160. }