bench-from.C 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // (c) Daniel Llorens - 2015, 2017
  2. // This library is free software; you can redistribute it and/or modify it under
  3. // the terms of the GNU Lesser General Public License as published by the Free
  4. // Software Foundation; either version 3 of the License, or (at your option) any
  5. // later version.
  6. /// @file bench-from.C
  7. /// @brief Benchmark for ra:: selection ops.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include <string>
  11. #include "ra/test.H"
  12. #include "ra/complex.H"
  13. #include "ra/format.H"
  14. #include "ra/big.H"
  15. #include "ra/wrank.H"
  16. #include "ra/operators.H"
  17. #include "ra/io.H"
  18. #include "ra/bench.H"
  19. using std::cout, std::endl, std::flush;
  20. using real = double;
  21. int main()
  22. {
  23. TestRecorder tr(cout);
  24. cout.precision(4);
  25. tr.section("rank1(rank1)");
  26. {
  27. auto rank1_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  28. {
  29. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  30. using Array1 = std::decay_t<decltype(A_)>;
  31. Array1 A = ra::iota(Asize);
  32. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  33. Array1 B({Isize}, 0);
  34. auto II = I.data();
  35. auto AA = A.data();
  36. auto BB = B.data();
  37. Benchmark bm { N, 3 };
  38. auto report = [&](std::string const & tag, auto && bv)
  39. {
  40. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  41. .test_eq(ra::iota(Isize)*Istep, B);
  42. };
  43. report("indexing on raw pointers",
  44. bm.run([&]()
  45. {
  46. for (int i=0; i<Isize; ++i) {
  47. BB[i] = AA[II[i]];
  48. }
  49. }));
  50. report("vectorized selection",
  51. bm.run([&]()
  52. {
  53. B = A(I);
  54. }));
  55. report("write out the indexing loop",
  56. bm.run([&]()
  57. {
  58. for_each([&A](auto & b, auto i) { b = A(i); }, B, I);
  59. }));
  60. report("loop on scalar selection",
  61. bm.run([&]()
  62. {
  63. for (int i=0; i<Isize; ++i) {
  64. B(i) = A(I(i));
  65. }
  66. }));
  67. };
  68. tr.section("fixed rank");
  69. rank1_test(ra::Unique<real, 1>(), 10000, 500, 20, 10000);
  70. rank1_test(ra::Unique<real, 1>(), 1000, 50, 20, 10*10000);
  71. rank1_test(ra::Unique<real, 1>(), 100, 5, 20, 100*10000);
  72. rank1_test(ra::Unique<real, 1>(), 10000, 500, 2, 10000);
  73. rank1_test(ra::Unique<real, 1>(), 1000, 50, 2, 10*10000);
  74. rank1_test(ra::Unique<real, 1>(), 100, 5, 2, 100*10000);
  75. tr.section("var rank");
  76. rank1_test(ra::Unique<real>(), 10000, 500, 20, 10000);
  77. rank1_test(ra::Unique<real>(), 1000, 50, 20, 10*10000);
  78. rank1_test(ra::Unique<real>(), 100, 5, 20, 100*10000);
  79. rank1_test(ra::Unique<real>(), 10000, 500, 2, 10000);
  80. rank1_test(ra::Unique<real>(), 1000, 50, 2, 10*10000);
  81. rank1_test(ra::Unique<real>(), 100, 5, 2, 100*10000);
  82. }
  83. tr.section("rank2(rank1, rank1)");
  84. {
  85. auto rank1_11_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  86. {
  87. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  88. using Array2 = std::decay_t<decltype(A_)>;
  89. Array2 A({Asize, Asize}, ra::_0 + ra::_1);
  90. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  91. Array2 B({Isize, Isize}, 0);
  92. auto II = I.data();
  93. auto AA = A.data();
  94. auto BB = B.data();
  95. Benchmark bm { N, 3 };
  96. auto report = [&](std::string const &tag, auto && bv)
  97. {
  98. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  99. .test_eq(Istep*(ra::_0 + ra::_1), B);
  100. };
  101. report("2D indexing on raw pointers",
  102. bm.run([&]()
  103. {
  104. for (int i=0; i<Isize; ++i) {
  105. for (int j=0; j<Isize; ++j) {
  106. BB[i*Isize + j] = AA[II[i]*Asize + II[j]];
  107. }
  108. }
  109. }));
  110. report("vectorized selection",
  111. bm.run([&]()
  112. {
  113. B = A(I, I);
  114. }));
  115. };
  116. tr.section("fixed rank");
  117. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 20, 10000);
  118. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 20, 10*10*10000);
  119. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 2, 10000);
  120. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 2, 10*10*10000);
  121. rank1_11_test(ra::Unique<real, 2>(), 10, 5, 2, 10*10*10000);
  122. tr.section("var rank");
  123. rank1_11_test(ra::Unique<real>(), 1000, 50, 20, 10000);
  124. rank1_11_test(ra::Unique<real>(), 100, 5, 20, 10*10*10000);
  125. rank1_11_test(ra::Unique<real>(), 1000, 50, 2, 10000);
  126. rank1_11_test(ra::Unique<real>(), 100, 5, 2, 10*10*10000);
  127. rank1_11_test(ra::Unique<real>(), 10, 5, 2, 10*10*10000);
  128. }
  129. tr.section("rank3(rank1, rank1, rank1)");
  130. {
  131. auto rank1_111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  132. {
  133. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  134. using Array3 = std::decay_t<decltype(A_)>;
  135. Array3 A({Asize, Asize, Asize}, 10000*ra::_0 + 100*ra::_1 + 1*ra::_2);
  136. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  137. Array3 B({Isize, Isize, Isize}, 0);
  138. auto II = I.data();
  139. auto AA = A.data();
  140. auto BB = B.data();
  141. Benchmark bm { N, 3 };
  142. auto report = [&](std::string const &tag, auto && bv)
  143. {
  144. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  145. .test_eq(Istep*(10000*ra::_0 + 100*ra::_1 + 1*ra::_2), B);
  146. };
  147. report("3D indexing on raw pointers",
  148. bm.run([&]()
  149. {
  150. for (int i=0; i<Isize; ++i) {
  151. for (int j=0; j<Isize; ++j) {
  152. for (int k=0; k<Isize; ++k) {
  153. BB[k+Isize*(j+Isize*i)] = AA[II[k]+Asize*(II[j]+Asize*II[i])];
  154. }
  155. }
  156. }
  157. }));
  158. report("vectorized selection",
  159. bm.run([&]()
  160. {
  161. B = A(I, I, I);
  162. }));
  163. };
  164. tr.section("fixed rank");
  165. rank1_111_test(ra::Unique<real, 3>(), 40, 20, 2, 4000);
  166. rank1_111_test(ra::Unique<real, 3>(), 100, 5, 20, 4*4*4*4000);
  167. rank1_111_test(ra::Unique<real, 3>(), 10, 5, 2, 4*4*4*4000);
  168. }
  169. tr.section("rank4(rank1, rank1, rank1, rank1)");
  170. {
  171. auto rank1_1111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  172. {
  173. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  174. using Array4 = std::decay_t<decltype(A_)>;
  175. ra::Unique<real, 4> A(ra::Small<int, 4>(Asize), 1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3);
  176. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  177. Array4 B(ra::Small<int, 4>(Isize), 0);
  178. auto II = I.data();
  179. auto AA = A.data();
  180. auto BB = B.data();
  181. Benchmark bm { N, 3 };
  182. auto report = [&](std::string const &tag, auto && bv)
  183. {
  184. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  185. .test_eq(Istep*(1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3), B);
  186. };
  187. report("3D indexing on raw pointers",
  188. bm.run([&]()
  189. {
  190. for (int i=0; i<Isize; ++i) {
  191. for (int j=0; j<Isize; ++j) {
  192. for (int k=0; k<Isize; ++k) {
  193. for (int l=0; l<Isize; ++l) {
  194. BB[l+Isize*(k+Isize*(j+Isize*i))] = AA[II[l]+Asize*(II[k]+Asize*(II[j]+Asize*II[i]))];
  195. }
  196. }
  197. }
  198. }
  199. }));
  200. report("vectorized selection",
  201. bm.run([&]()
  202. {
  203. B = A(I, I, I, I);
  204. }));
  205. report("slice one axis at a time", // TODO one way A(i, i, i, i) could work
  206. bm.run([&]()
  207. {
  208. for (int i=0; i<Isize; ++i) {
  209. for (int j=0; j<Isize; ++j) {
  210. for (int k=0; k<Isize; ++k) {
  211. B(i, j, k) = A(I[i], I[j], I[k])(I);
  212. }
  213. }
  214. }
  215. }));
  216. };
  217. tr.section("fixed rank");
  218. rank1_1111_test(ra::Unique<real, 4>(), 40, 20, 2, 200);
  219. rank1_1111_test(ra::Unique<real, 4>(), 10, 5, 2, 4*4*4*4*200);
  220. }
  221. return tr.summary();
  222. }