bench-dot.C 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // -*- mode: c++; coding: utf-8 -*-
  2. /// @file bench-dot.H
  3. /// @brief Benchmark for dot with various array types.
  4. // (c) Daniel Llorens - 2011, 2014-2015, 2017
  5. // This library is free software; you can redistribute it and/or modify it under
  6. // the terms of the GNU Lesser General Public License as published by the Free
  7. // Software Foundation; either version 3 of the License, or (at your option) any
  8. // later version.
  9. #include <iostream>
  10. #include <iomanip>
  11. #include <random>
  12. #include "ra/test.H"
  13. #include "ra/complex.H"
  14. #include "ra/format.H"
  15. #include "ra/big.H"
  16. #include "ra/wrank.H"
  17. #include "ra/operators.H"
  18. #include "ra/io.H"
  19. #include "ra/bench.H"
  20. #include "test/old.H"
  21. using std::cout, std::endl, std::setw, std::setprecision;
  22. using ra::Small, ra::View, ra::Unique, ra::ra_traits, ra::dim_t;
  23. using real = double;
  24. int const N = 200000;
  25. Small<dim_t, 1> S1 { 24*24 };
  26. Small<dim_t, 2> S2 { 24, 24 };
  27. Small<dim_t, 3> S3 { 8, 8, 9 };
  28. struct by_raw
  29. {
  30. constexpr static char const * name = "raw";
  31. template <class A, class B>
  32. std::enable_if_t<ra_traits<A>::rank_s()==1, real>
  33. operator()(A const & a, B const & b)
  34. {
  35. real y(0.);
  36. for (int j=0; j<S1[0]; ++j) {
  37. y += a[j]*b[j];
  38. }
  39. return y;
  40. }
  41. template <class A, class B>
  42. std::enable_if_t<ra_traits<A>::rank_s()==2, real>
  43. operator()(A const & a, B const & b)
  44. {
  45. real y(0.);
  46. for (int j=0; j<S2[0]; ++j) {
  47. for (int k=0; k<S2[1]; ++k) {
  48. y += a(j, k)*b(j, k);
  49. }
  50. }
  51. return y;
  52. }
  53. template <class A, class B>
  54. std::enable_if_t<ra_traits<A>::rank_s()==3, real>
  55. operator()(A const & a, B const & b)
  56. {
  57. real y(0.);
  58. for (int j=0; j<S3[0]; ++j) {
  59. for (int k=0; k<S3[1]; ++k) {
  60. for (int l=0; l<S3[2]; ++l) {
  61. y += a(j, k, l)*b(j, k, l);
  62. }
  63. }
  64. }
  65. return y;
  66. }
  67. };
  68. #define BY_PLY(NAME, INSIDE) \
  69. struct NAME { \
  70. constexpr static char const * name = STRINGIZE(NAME); \
  71. template <class A, class B> real operator()(A && a, B && b) \
  72. { \
  73. real y(0.); \
  74. INSIDE; \
  75. return y; \
  76. } \
  77. }; \
  78. #define BY_PLY_TAGGED(PLYER) \
  79. /* plain */ \
  80. BY_PLY(JOIN(by_1l_, PLYER), \
  81. ra::PLYER(ra::map([&y](real const a, real const b) { y += a*b; }, \
  82. a, b))) \
  83. \
  84. /* separate reduction */ \
  85. BY_PLY(JOIN(by_2l_, PLYER), \
  86. ra::PLYER(ra::map([&y](real const a) { y += a; }, \
  87. ra::map([](real const a, real const b) { return a*b; }, \
  88. a, b)))) \
  89. \
  90. /* using trivial rank conjunction */ \
  91. BY_PLY(JOIN(by_1w_, PLYER), \
  92. ra::PLYER(ra::map(ra::wrank<0, 0>([&y](real const a, real const b) { y += a*b; }), \
  93. a, b))) \
  94. \
  95. /* separate reduction: using trivial rank conjunction */ \
  96. BY_PLY(JOIN(by_2w_, PLYER), \
  97. ra::PLYER(ra::map(ra::wrank<0>([&y](real const a) { y += a; }), \
  98. ra::map(ra::wrank<0, 0>([](real const a, real const b) { return a*b; }), \
  99. a, b))));
  100. BY_PLY_TAGGED(ply_ravel);
  101. BY_PLY_TAGGED(ply_index);
  102. BY_PLY_TAGGED(plyf_index);
  103. BY_PLY_TAGGED(plyf);
  104. real a, b, ref, rspec;
  105. int main()
  106. {
  107. std::random_device rand;
  108. a = rand();
  109. b = rand();
  110. ref = a*b*S1[0]*N;
  111. TestRecorder tr;
  112. {
  113. auto bench = [&tr](char const * tag, auto s, auto && ref, int reps, auto && f)
  114. {
  115. rspec = 1e-2;
  116. constexpr int M = ra::ra_traits<decltype(s)>::size(s);
  117. decltype(s) A(a);
  118. decltype(s) B(b);
  119. real y(0.);
  120. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { y += f(A, B); });
  121. tr.info(std::setw(6), std::fixed, Benchmark::avg(bv)/M/1e-9, " ns [", Benchmark::stddev(bv)/M/1e-9, "] ", tag)
  122. .test_rel_error(a*b*M*reps*3, y, rspec);
  123. };
  124. auto f_small_indexed_1 = [](auto && A, auto && B)
  125. {
  126. real y = 0;
  127. for (int j=0; j!=A.size(); ++j) {
  128. y += A(j)*B(j);
  129. }
  130. return y;
  131. };
  132. auto f_small_indexed_2 = [](auto && A, auto && B)
  133. {
  134. real y = 0;
  135. for (int i=0; i!=A.size(0); ++i) {
  136. for (int j=0; j!=A.size(1); ++j) {
  137. y += A(i, j)*B(i, j);
  138. }
  139. }
  140. return y;
  141. };
  142. auto f_small_indexed_3 = [](auto && A, auto && B)
  143. {
  144. real y = 0;
  145. for (int i=0; i!=A.size(0); ++i) {
  146. for (int j=0; j!=A.size(1); ++j) {
  147. for (int k=0; k!=A.size(2); ++k) {
  148. y += A(i, j, k)*B(i, j, k);
  149. }
  150. }
  151. }
  152. return y;
  153. };
  154. auto f_small_indexed_raw = [](auto && A, auto && B)
  155. {
  156. real * a = A.data();
  157. real * b = B.data();
  158. real y = 0;
  159. for (int j=0; j!=A.size(); ++j) {
  160. y += a[j]*b[j];
  161. }
  162. return y;
  163. };
  164. // optimize() plugs into the definition of operator*, etc.
  165. auto f_small_op = [](auto && A, auto && B)
  166. {
  167. return sum(A*B); // TODO really slow with rank>2 or even rank>1 depending on the sizes
  168. };
  169. #define DEFINE_SMALL_PLY(name, plier) \
  170. auto JOIN(f_small_, plier) = [](auto && A, auto && B) \
  171. { \
  172. real y = 0; \
  173. plier(ra::map([&y](real a, real b) { y += a*b; }, A, B)); \
  174. return y; \
  175. }
  176. DEFINE_SMALL_PLY(ply_ravel, ply_ravel);
  177. DEFINE_SMALL_PLY(ply_index, ply_index);
  178. DEFINE_SMALL_PLY(plyf, plyf);
  179. DEFINE_SMALL_PLY(plyf_index, plyf_index);
  180. DEFINE_SMALL_PLY(ply, ply);
  181. auto bench_all = [&](auto s, auto && f_small_indexed)
  182. {
  183. constexpr int M = ra::ra_traits<decltype(s)>::size(s);
  184. tr.section("small <", ra::ra_traits<decltype(s)>::shape(s), ">");
  185. auto extra = [&]() { return int(double(std::rand())*100/RAND_MAX); };
  186. int reps = (1000*1000*100)/M;
  187. bench("indexed", s, ref, reps+extra(), f_small_indexed);
  188. bench("indexed_raw", s, ref, reps+extra(), f_small_indexed_raw);
  189. bench("op", s, ref, reps+extra(), f_small_op);
  190. bench("ply_ravel", s, ref, reps+extra(), f_small_ply_ravel);
  191. bench("ply_index", s, ref, reps+extra(), f_small_ply_index);
  192. bench("plyf", s, ref, reps+extra(), f_small_plyf);
  193. bench("plyf_index", s, ref, reps+extra(), f_small_plyf_index);
  194. bench("ply", s, ref, reps+extra(), f_small_ply);
  195. };
  196. bench_all(ra::Small<real, 2>(), f_small_indexed_1);
  197. bench_all(ra::Small<real, 3>(), f_small_indexed_1);
  198. bench_all(ra::Small<real, 4>(), f_small_indexed_1);
  199. bench_all(ra::Small<real, 8>(), f_small_indexed_1);
  200. bench_all(ra::Small<real, 2, 2>(), f_small_indexed_2);
  201. bench_all(ra::Small<real, 3, 3>(), f_small_indexed_2);
  202. bench_all(ra::Small<real, 4, 4>(), f_small_indexed_2);
  203. bench_all(ra::Small<real, 3, 3, 3>(), f_small_indexed_3);
  204. }
  205. rspec = 2e-11;
  206. auto bench = [&tr](auto && a, auto && b, auto && ref, real rspec, int reps, auto && f)
  207. {
  208. real x = 0.;
  209. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { x += f(a, b); });
  210. tr.info(std::setw(6), std::fixed, Benchmark::avg(bv)/1e-9, " ns [", Benchmark::stddev(bv)/1e-9, "] ", f.name)
  211. .test_rel_error(ref*3, x, rspec);
  212. };
  213. #define BENCH(f) bench(A, B, ref, rspec, N, f {});
  214. tr.section("std::vector<>");
  215. {
  216. std::vector<real> A(S1[0], a);
  217. std::vector<real> B(S1[0], b);
  218. BENCH(by_raw);
  219. }
  220. tr.section("unchecked pointer");
  221. {
  222. std::unique_ptr<real []> Au { new real[S1[0]] };
  223. std::unique_ptr<real []> Bu { new real[S1[0]] };
  224. auto A = Au.get();
  225. auto B = Bu.get();
  226. std::fill(A, A+S1[0], a);
  227. std::fill(B, B+S1[0], b);
  228. BENCH(by_raw);
  229. }
  230. #define BENCH_ALL \
  231. FOR_EACH(BENCH, by_raw, by_1l_ply_ravel, by_2l_ply_ravel, by_1w_ply_ravel, by_2w_ply_ravel); \
  232. FOR_EACH(BENCH, by_1l_ply_index, by_2l_ply_index, by_1w_ply_index, by_2w_ply_index); \
  233. FOR_EACH(BENCH, by_1l_plyf, by_2l_plyf, by_1w_plyf, by_2w_plyf); \
  234. FOR_EACH(BENCH, by_1l_plyf_index, by_2l_plyf_index, by_1w_plyf_index, by_2w_plyf_index);
  235. tr.section("ra:: wrapped std::vector<>");
  236. {
  237. auto A = std::vector<real>(S1[0], a);
  238. auto B = std::vector<real>(S1[0], b);
  239. BENCH_ALL;
  240. }
  241. tr.section("raw<1>");
  242. {
  243. ra::Unique<real, 1> A(S1, a);
  244. ra::Unique<real, 1> B(S1, b);
  245. BENCH_ALL;
  246. }
  247. tr.section("raw<2>");
  248. {
  249. ra::Unique<real, 2> A(S2, a);
  250. ra::Unique<real, 2> B(S2, b);
  251. BENCH_ALL;
  252. }
  253. tr.section("raw<3>");
  254. {
  255. ra::Unique<real, 3> A(S3, a);
  256. ra::Unique<real, 3> B(S3, b);
  257. std::vector<real> x(17, ra::ALINF);
  258. BENCH_ALL;
  259. }
  260. return tr.summary();
  261. }