laplace3d.cc 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // -*- mode: c++; coding: utf-8 -*-
  2. /// @file laplace3d.cc
  3. /// @brief Solve Poisson equation in a parallelepiped, with linear elements.
  4. // (c) Daniel Llorens - 2017
  5. // This library is free software; you can redistribute it and/or modify it under
  6. // the terms of the GNU Lesser General Public License as published by the Free
  7. // Software Foundation; either version 3 of the License, or (at your option) any
  8. // later version.
  9. // Adapted from lsolver/laplace3d.cc by Christian Badura, 1998/05.
  10. // This benchmarks fairly close to the Cish original. We spend comparatively
  11. // less time in cghs than in laplace2d.cc, so not using BLAS doesn't stand out as
  12. // much as it does there.
  13. #include "ra/ra.hh"
  14. #include "ra/test.hh"
  15. #include "examples/cghs.hh"
  16. #include "ra/bench.hh"
  17. using std::cout, std::endl, std::flush, ra::PI, ra::TestRecorder;
  18. Benchmark::clock::duration tmul(0);
  19. double const d23=2./3., d16=-1./6., d00=0.;
  20. double const d827=8./27., d427=4./27., d227=2./27., d127=1./27.;
  21. double const SM[8][8] = { {d23, d00, d16, d00, d00, d16, d16, d16},
  22. {d00, d23, d00, d16, d16, d00, d16, d16},
  23. {d16, d00, d23, d00, d16, d16, d00, d16},
  24. {d00, d16, d00, d23, d16, d16, d16, d00},
  25. {d00, d16, d16, d16, d23, d00, d16, d00},
  26. {d16, d00, d16, d16, d00, d23, d00, d16},
  27. {d16, d16, d00, d16, d16, d00, d23, d00},
  28. {d16, d16, d16, d00, d00, d16, d00, d23} };
  29. double const MM[8][8] = { {d827, d427, d227, d427, d427, d227, d127, d227},
  30. {d427, d827, d427, d227, d227, d427, d227, d127},
  31. {d227, d427, d827, d427, d127, d227, d427, d227},
  32. {d427, d227, d427, d827, d227, d127, d227, d427},
  33. {d427, d227, d127, d227, d827, d427, d227, d427},
  34. {d227, d427, d227, d127, d427, d827, d427, d227},
  35. {d127, d227, d427, d227, d227, d427, d827, d427},
  36. {d227, d127, d227, d427, d427, d227, d427, d827} };
  37. int const ISF[8][3] = { {0, 0, 0}, {1, 0, 0}, {1, 1, 0}, {0, 1, 0},
  38. {0, 0, 1}, {1, 0, 1}, {1, 1, 1}, {0, 1, 1} };
  39. struct Element
  40. {
  41. int v[8]; // indices of 8 vertices.
  42. double reg1[8], reg2[8]; // temporary storage.
  43. };
  44. struct Matrix0bnd
  45. {
  46. ra::Small<double, 8, 8> M; // element matrix.
  47. double h; // scale factor.
  48. ra::Big<Element, 1> & E; // elements.
  49. ra::Big<int, 1> & B; // indices of boundary elements.
  50. };
  51. template <class A, class V, class W>
  52. void gemv(A const & a, V const & v, W & w)
  53. {
  54. auto pa = a.data();
  55. for (int i=0; i<a.size(0); ++i) {
  56. double tmp=0.;
  57. for (int j=0; j<a.size(1); ++j) {
  58. tmp += pa[j*a.size(1)+i]*v[j];
  59. // tmp += a(i, j)*v[j]; // FIXME is a 1/3 slower; maybe what makes ra::gemv slower overall
  60. }
  61. w[i] = tmp;
  62. }
  63. }
  64. template <class V, class W>
  65. void mult(Matrix0bnd & A, V const & v, W & w)
  66. {
  67. auto t0 = Benchmark::clock::now();
  68. w = 0.;
  69. for_each([&](auto && E)
  70. {
  71. ra::start(E.reg1) = v(E.v); // vector -> element
  72. gemv(A.M, E.reg1, E.reg2); // element matrix
  73. // ra::start(E.reg2) = ra::gemv(A.M, E.reg1); // FIXME somewhat slower
  74. w(E.v) += E.reg2; // element -> vector
  75. },
  76. A.E);
  77. w(A.B) = 0; // set boundary
  78. w *= A.h; // scale factor
  79. tmul += Benchmark::clock::now()-t0;
  80. }
  81. struct StiffMatrix: Matrix0bnd
  82. {
  83. StiffMatrix(double h, ra::Big<Element, 1> & E, ra::Big<int, 1> & B)
  84. : Matrix0bnd { SM, h, E, B } {};
  85. };
  86. struct MassMatrix: Matrix0bnd
  87. {
  88. MassMatrix(double h, ra::Big<Element, 1> & E, ra::Big<int, 1> & B)
  89. : Matrix0bnd { MM, h*h*h, E, B } {};
  90. };
  91. inline int pos(int n, int i, int j, int k)
  92. {
  93. return n*(n*i + j) + k;
  94. }
  95. // CosCosCosSolution, CosCosCos in [-1, 1]^3
  96. double g(double x, double y, double z)
  97. {
  98. return cos(.5*PI*x)*cos(.5*PI*y)*cos(.5*PI*z);
  99. }
  100. double f(double x, double y, double z)
  101. {
  102. return .25*3.*PI*PI*cos(.5*PI*x)*cos(.5*PI*y)*cos(.5*PI*z);
  103. }
  104. int main()
  105. {
  106. TestRecorder tr(std::cout);
  107. auto t0 = Benchmark::clock::now();
  108. int n = 50;
  109. double EPS = 1e-5;
  110. double h = 1./n; // space = ±1, so grid step = 2h
  111. ra::Big<ra::Small<double, 3>, 1> V({(n+1)*(n+1)*(n+1)}, ra::none); // vertex coordinates.
  112. ra::Big<int, 1> B({6*(n+1)*(n+1)}, ra::none); // indices of boundary vertices.
  113. ra::Big<Element, 1> E({n*n*n}, ra::none); // the elements.
  114. // set vertex coordinates.
  115. auto ip = ra::iota(n+1);
  116. ply(from([&](auto i, auto j, auto k)
  117. { V[pos(n+1, i, j, k)] = { -1.+2.*i*h, -1.+2.*j*h, -1.+2.*k*h }; },
  118. ip, ip, ip));
  119. // set elements.
  120. auto i = ra::iota(n);
  121. ply(from([&](auto i, auto j, auto k, auto v)
  122. { E(pos(n, i, j, k)).v[v] = pos(n+1, i+ISF[v][0], j+ISF[v][1], k+ISF[v][2]); },
  123. i, i, i, ra::iota(8)));
  124. // set boundaries (points on edges and corners multiple times --doesn't matter).
  125. int k = 0;
  126. for (int i=0; i<=n; ++i) {
  127. for (int j=0; j<=n; ++j) {
  128. B[k++] = pos(n+1, 0, i, j);
  129. B[k++] = pos(n+1, n, i, j);
  130. B[k++] = pos(n+1, i, 0, j);
  131. B[k++] = pos(n+1, i, n, j);
  132. B[k++] = pos(n+1, i, j, 0);
  133. B[k++] = pos(n+1, i, j, n);
  134. }
  135. }
  136. ra::Big<double, 1> b({(n+1)*(n+1)*(n+1)}, 0.); // right hand side
  137. ra::Big<double, 1> x({(n+1)*(n+1)*(n+1)}, 0.); // solution vector
  138. // set right side.
  139. ra::Big<double, 1> aux = map([](auto && Vi) { return f(Vi[0], Vi[1], Vi[2]); }, V);
  140. // integral ~ product with mass matrix.
  141. MassMatrix MM(h, E, B);
  142. mult(MM, aux, b);
  143. // solve.
  144. StiffMatrix SM(h, E, B);
  145. ra::Big<double, 2> work({3, (n+1)*(n+1)*(n+1)}, ra::none);
  146. auto t1 = Benchmark::clock::now();
  147. int its = cghs(SM, b, x, work, EPS);
  148. auto tsolve = Benchmark::clock::now()-t1;
  149. // compare with exact solution.
  150. ra::Big<double, 1> c = map([](auto && Vi) { return g(Vi[0], Vi[1], Vi[2]); }, V);
  151. double err = amax(abs(c-x));
  152. auto ttot = Benchmark::clock::now()-t0;
  153. cout << "total time " << Benchmark::toseconds(ttot)/1e-3 << " " << "ms" << endl;
  154. cout << "solve time " << Benchmark::toseconds(tsolve)/1e-3 << " " << "ms" << endl;
  155. cout << "mul time " << Benchmark::toseconds(tmul)/1e-3 << " " << "ms" << endl;
  156. tr.info("err ", err).test_le(err, 0.00033);
  157. tr.info("its ", its).test_le(its, 1);
  158. return tr.summary();
  159. }