bench-from.cc 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // (c) Daniel Llorens - 2015, 2017
  2. // This library is free software; you can redistribute it and/or modify it under
  3. // the terms of the GNU Lesser General Public License as published by the Free
  4. // Software Foundation; either version 3 of the License, or (at your option) any
  5. // later version.
  6. /// @file bench-from.cc
  7. /// @brief Benchmark for ra:: selection ops.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include <string>
  11. #include "ra/test.hh"
  12. #include "ra/complex.hh"
  13. #include "ra/ra.hh"
  14. #include "ra/bench.hh"
  15. using std::cout, std::endl, std::flush, ra::TestRecorder;
  16. using real = double;
  17. int main()
  18. {
  19. TestRecorder tr(cout);
  20. cout.precision(4);
  21. tr.section("rank1(rank1)");
  22. {
  23. auto rank1_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  24. {
  25. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  26. using Array1 = std::decay_t<decltype(A_)>;
  27. Array1 A = ra::iota(Asize);
  28. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  29. Array1 B({Isize}, 0);
  30. auto II = I.data();
  31. auto AA = A.data();
  32. auto BB = B.data();
  33. Benchmark bm { N, 3 };
  34. auto report = [&](std::string const & tag, auto && bv)
  35. {
  36. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  37. .test_eq(ra::iota(Isize)*Istep, B);
  38. };
  39. report("indexing on raw pointers",
  40. bm.run([&]()
  41. {
  42. for (int i=0; i<Isize; ++i) {
  43. BB[i] = AA[II[i]];
  44. }
  45. }));
  46. report("vectorized selection",
  47. bm.run([&]()
  48. {
  49. B = A(I);
  50. }));
  51. report("write out the indexing loop",
  52. bm.run([&]()
  53. {
  54. for_each([&A](auto & b, auto i) { b = A(i); }, B, I);
  55. }));
  56. report("loop on scalar selection",
  57. bm.run([&]()
  58. {
  59. for (int i=0; i<Isize; ++i) {
  60. B(i) = A(I(i));
  61. }
  62. }));
  63. };
  64. tr.section("fixed rank");
  65. rank1_test(ra::Unique<real, 1>(), 10000, 500, 20, 10000);
  66. rank1_test(ra::Unique<real, 1>(), 1000, 50, 20, 10*10000);
  67. rank1_test(ra::Unique<real, 1>(), 100, 5, 20, 100*10000);
  68. rank1_test(ra::Unique<real, 1>(), 10000, 500, 2, 10000);
  69. rank1_test(ra::Unique<real, 1>(), 1000, 50, 2, 10*10000);
  70. rank1_test(ra::Unique<real, 1>(), 100, 5, 2, 100*10000);
  71. tr.section("var rank");
  72. rank1_test(ra::Unique<real>(), 10000, 500, 20, 10000);
  73. rank1_test(ra::Unique<real>(), 1000, 50, 20, 10*10000);
  74. rank1_test(ra::Unique<real>(), 100, 5, 20, 100*10000);
  75. rank1_test(ra::Unique<real>(), 10000, 500, 2, 10000);
  76. rank1_test(ra::Unique<real>(), 1000, 50, 2, 10*10000);
  77. rank1_test(ra::Unique<real>(), 100, 5, 2, 100*10000);
  78. }
  79. tr.section("rank2(rank1, rank1)");
  80. {
  81. auto rank1_11_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  82. {
  83. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  84. using Array2 = std::decay_t<decltype(A_)>;
  85. Array2 A({Asize, Asize}, ra::_0 + ra::_1);
  86. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  87. Array2 B({Isize, Isize}, 0);
  88. auto II = I.data();
  89. auto AA = A.data();
  90. auto BB = B.data();
  91. Benchmark bm { N, 3 };
  92. auto report = [&](std::string const &tag, auto && bv)
  93. {
  94. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  95. .test_eq(Istep*(ra::_0 + ra::_1), B);
  96. };
  97. report("2D indexing on raw pointers",
  98. bm.run([&]()
  99. {
  100. for (int i=0; i<Isize; ++i) {
  101. for (int j=0; j<Isize; ++j) {
  102. BB[i*Isize + j] = AA[II[i]*Asize + II[j]];
  103. }
  104. }
  105. }));
  106. report("vectorized selection",
  107. bm.run([&]()
  108. {
  109. B = A(I, I);
  110. }));
  111. };
  112. tr.section("fixed rank");
  113. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 20, 10000);
  114. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 20, 10*10*10000);
  115. rank1_11_test(ra::Unique<real, 2>(), 1000, 50, 2, 10000);
  116. rank1_11_test(ra::Unique<real, 2>(), 100, 5, 2, 10*10*10000);
  117. rank1_11_test(ra::Unique<real, 2>(), 10, 5, 2, 10*10*10000);
  118. tr.section("var rank");
  119. rank1_11_test(ra::Unique<real>(), 1000, 50, 20, 10000);
  120. rank1_11_test(ra::Unique<real>(), 100, 5, 20, 10*10*10000);
  121. rank1_11_test(ra::Unique<real>(), 1000, 50, 2, 10000);
  122. rank1_11_test(ra::Unique<real>(), 100, 5, 2, 10*10*10000);
  123. rank1_11_test(ra::Unique<real>(), 10, 5, 2, 10*10*10000);
  124. }
  125. tr.section("rank3(rank1, rank1, rank1)");
  126. {
  127. auto rank1_111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  128. {
  129. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  130. using Array3 = std::decay_t<decltype(A_)>;
  131. Array3 A({Asize, Asize, Asize}, 10000*ra::_0 + 100*ra::_1 + 1*ra::_2);
  132. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  133. Array3 B({Isize, Isize, Isize}, 0);
  134. auto II = I.data();
  135. auto AA = A.data();
  136. auto BB = B.data();
  137. Benchmark bm { N, 3 };
  138. auto report = [&](std::string const &tag, auto && bv)
  139. {
  140. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  141. .test_eq(Istep*(10000*ra::_0 + 100*ra::_1 + 1*ra::_2), B);
  142. };
  143. report("3D indexing on raw pointers",
  144. bm.run([&]()
  145. {
  146. for (int i=0; i<Isize; ++i) {
  147. for (int j=0; j<Isize; ++j) {
  148. for (int k=0; k<Isize; ++k) {
  149. BB[k+Isize*(j+Isize*i)] = AA[II[k]+Asize*(II[j]+Asize*II[i])];
  150. }
  151. }
  152. }
  153. }));
  154. report("vectorized selection",
  155. bm.run([&]()
  156. {
  157. B = A(I, I, I);
  158. }));
  159. };
  160. tr.section("fixed rank");
  161. rank1_111_test(ra::Unique<real, 3>(), 40, 20, 2, 4000);
  162. rank1_111_test(ra::Unique<real, 3>(), 100, 5, 20, 4*4*4*4000);
  163. rank1_111_test(ra::Unique<real, 3>(), 10, 5, 2, 4*4*4*4000);
  164. }
  165. tr.section("rank4(rank1, rank1, rank1, rank1)");
  166. {
  167. auto rank1_1111_test = [&tr](auto A_, int Asize, int Isize, int Istep, int N)
  168. {
  169. cout << "select " << Isize << " step " << Istep << " from " << Asize << endl;
  170. using Array4 = std::decay_t<decltype(A_)>;
  171. ra::Unique<real, 4> A(ra::Small<int, 4>(Asize), 1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3);
  172. ra::Unique<int, 1> I = ra::iota(Isize)*Istep;
  173. Array4 B(ra::Small<int, 4>(Isize), 0);
  174. auto II = I.data();
  175. auto AA = A.data();
  176. auto BB = B.data();
  177. Benchmark bm { N, 3 };
  178. auto report = [&](std::string const &tag, auto && bv)
  179. {
  180. tr.info(std::setw(5), std::fixed, bm.avg(bv)/B.size()/1e-9, " ns [", bm.stddev(bv)/B.size()/1e-9, "] ", tag)
  181. .test_eq(Istep*(1000000*ra::_0 + 10000*ra::_1 + 100*ra::_2 + 1*ra::_3), B);
  182. };
  183. report("3D indexing on raw pointers",
  184. bm.run([&]()
  185. {
  186. for (int i=0; i<Isize; ++i) {
  187. for (int j=0; j<Isize; ++j) {
  188. for (int k=0; k<Isize; ++k) {
  189. for (int l=0; l<Isize; ++l) {
  190. BB[l+Isize*(k+Isize*(j+Isize*i))] = AA[II[l]+Asize*(II[k]+Asize*(II[j]+Asize*II[i]))];
  191. }
  192. }
  193. }
  194. }
  195. }));
  196. report("vectorized selection",
  197. bm.run([&]()
  198. {
  199. B = A(I, I, I, I);
  200. }));
  201. report("slice one axis at a time", // TODO one way A(i, i, i, i) could work
  202. bm.run([&]()
  203. {
  204. for (int i=0; i<Isize; ++i) {
  205. for (int j=0; j<Isize; ++j) {
  206. for (int k=0; k<Isize; ++k) {
  207. B(i, j, k) = A(I[i], I[j], I[k])(I);
  208. }
  209. }
  210. }
  211. }));
  212. };
  213. tr.section("fixed rank");
  214. rank1_1111_test(ra::Unique<real, 4>(), 40, 20, 2, 200);
  215. rank1_1111_test(ra::Unique<real, 4>(), 10, 5, 2, 4*4*4*4*200);
  216. }
  217. return tr.summary();
  218. }