bench-dot.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra/bench - dot() with various array types.
  3. // (c) Daniel Llorens - 2011-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include <random>
  11. #include "ra/bench.hh"
  12. using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder;
  13. using ra::Small, ra::View, ra::Unique, ra::dim_t;
  14. using real = double;
  15. int const N = 100000;
  16. Small<dim_t, 1> S1 { 24*24 };
  17. Small<dim_t, 2> S2 { 24, 24 };
  18. Small<dim_t, 3> S3 { 8, 8, 9 };
  19. struct by_raw
  20. {
  21. constexpr static char const * name = "raw";
  22. template <class A, class B>
  23. real operator()(A const & a, B const & b)
  24. {
  25. constexpr int rank = ra::rank_s<A>();
  26. if constexpr ((std::is_pointer_v<A> && std::is_pointer_v<B>) || 1==rank) {
  27. real y(0.);
  28. for (int j=0; j<S1[0]; ++j) {
  29. y += a[j]*b[j];
  30. }
  31. return y;
  32. } else if constexpr (2==rank) {
  33. real y(0.);
  34. for (int j=0; j<S2[0]; ++j) {
  35. for (int k=0; k<S2[1]; ++k) {
  36. y += a(j, k)*b(j, k);
  37. }
  38. }
  39. return y;
  40. } else if constexpr (3==rank) {
  41. real y(0.);
  42. for (int j=0; j<S3[0]; ++j) {
  43. for (int k=0; k<S3[1]; ++k) {
  44. for (int l=0; l<S3[2]; ++l) {
  45. y += a(j, k, l)*b(j, k, l);
  46. }
  47. }
  48. }
  49. return y;
  50. } else {
  51. abort();
  52. }
  53. }
  54. };
  55. #define BY_PLY(NAME, INSIDE) \
  56. struct NAME { \
  57. constexpr static char const * name = STRINGIZE(NAME); \
  58. template <class A, class B> real operator()(A && a, B && b) \
  59. { \
  60. real y(0.); \
  61. INSIDE; \
  62. return y; \
  63. } \
  64. }; \
  65. #define BY_PLY_TAGGED(PLYER) \
  66. /* plain */ \
  67. BY_PLY(JOIN(by_1l_, PLYER), \
  68. ra::PLYER(ra::map([&y](real const a, real const b) { y += a*b; }, \
  69. a, b))) \
  70. \
  71. /* separate reduction */ \
  72. BY_PLY(JOIN(by_2l_, PLYER), \
  73. ra::PLYER(ra::map([&y](real const a) { y += a; }, \
  74. ra::map([](real const a, real const b) { return a*b; }, \
  75. a, b)))) \
  76. \
  77. /* using trivial rank conjunction. FIXME requires static rank */ \
  78. BY_PLY(JOIN(by_1w_, PLYER), \
  79. ra::PLYER(ra::map(ra::wrank<0, 0>([&y](real const a, real const b) { y += a*b; }), \
  80. a, b))) \
  81. \
  82. /* separate reduction: using trivial rank conjunction. FIXME requires static rank */ \
  83. BY_PLY(JOIN(by_2w_, PLYER), \
  84. ra::PLYER(ra::map(ra::wrank<0>([&y](real const a) { y += a; }), \
  85. ra::map(ra::wrank<0, 0>([](real const a, real const b) { return a*b; }), \
  86. a, b))));
  87. BY_PLY_TAGGED(ply_ravel);
  88. BY_PLY_TAGGED(ply_fixed);
  89. real a, b, ref, rspec;
  90. int main()
  91. {
  92. std::random_device rand;
  93. a = rand();
  94. b = rand();
  95. ref = a*b*S1[0]*N;
  96. TestRecorder tr;
  97. tr.o.width(6);
  98. tr.o.precision(4);
  99. {
  100. auto bench = [&tr](char const * tag, auto s, auto && ref, int reps, auto && f)
  101. {
  102. rspec = 1e-2;
  103. constexpr int M = ra::size(s);
  104. decltype(s) A(a);
  105. decltype(s) B(b);
  106. real y(0.);
  107. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { y += f(A, B); });
  108. tr.info(Benchmark::avg(bv)/M/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/M/1e-9, "] ", tag)
  109. .test_rel(a*b*M*reps*3, y, rspec);
  110. };
  111. auto f_small_indexed_1 = [](auto && A, auto && B)
  112. {
  113. real y = 0;
  114. for (int j=0; j!=A.size(); ++j) {
  115. y += A(j)*B(j);
  116. }
  117. return y;
  118. };
  119. auto f_small_indexed_2 = [](auto && A, auto && B)
  120. {
  121. real y = 0;
  122. for (int i=0; i!=A.len(0); ++i) {
  123. for (int j=0; j!=A.len(1); ++j) {
  124. y += A(i, j)*B(i, j);
  125. }
  126. }
  127. return y;
  128. };
  129. auto f_small_indexed_3 = [](auto && A, auto && B)
  130. {
  131. real y = 0;
  132. for (int i=0; i!=A.len(0); ++i) {
  133. for (int j=0; j!=A.len(1); ++j) {
  134. for (int k=0; k!=A.len(2); ++k) {
  135. y += A(i, j, k)*B(i, j, k);
  136. }
  137. }
  138. }
  139. return y;
  140. };
  141. auto f_small_indexed_raw = [](auto && A, auto && B)
  142. {
  143. real * a = A.data();
  144. real * b = B.data();
  145. real y = 0;
  146. for (int j=0; j!=A.size(); ++j) {
  147. y += a[j]*b[j];
  148. }
  149. return y;
  150. };
  151. // optimize() plugs into the definition of operator*, etc. See ply_fixed [ra43].
  152. auto f_small_op = [](auto && A, auto && B)
  153. {
  154. return sum(A*B);
  155. };
  156. #define DEFINE_SMALL_PLY(name, plier) \
  157. auto JOIN(f_small_, plier) = [](auto && A, auto && B) \
  158. { \
  159. real y = 0; \
  160. plier(ra::map([&y](real a, real b) { y += a*b; }, A, B)); \
  161. return y; \
  162. }
  163. DEFINE_SMALL_PLY(ply_ravel, ply_ravel);
  164. DEFINE_SMALL_PLY(ply_fixed, ply_fixed);
  165. DEFINE_SMALL_PLY(ply, ply);
  166. auto bench_all = [&](auto s, auto && f_small_indexed)
  167. {
  168. constexpr int M = ra::size(s);
  169. tr.section("small <", ra::shape(s), ">");
  170. auto extra = [&]() { return int(double(std::rand())*100/RAND_MAX); };
  171. int reps = (1000*1000*100)/M;
  172. bench("indexed", s, ref, reps+extra(), f_small_indexed);
  173. bench("indexed_raw", s, ref, reps+extra(), f_small_indexed_raw);
  174. bench("op", s, ref, reps+extra(), f_small_op);
  175. bench("ply_ravel", s, ref, reps+extra(), f_small_ply_ravel);
  176. bench("ply_fixed", s, ref, reps+extra(), f_small_ply_fixed);
  177. bench("ply", s, ref, reps+extra(), f_small_ply);
  178. };
  179. bench_all(ra::Small<real, 2>(), f_small_indexed_1);
  180. bench_all(ra::Small<real, 3>(), f_small_indexed_1);
  181. bench_all(ra::Small<real, 4>(), f_small_indexed_1);
  182. bench_all(ra::Small<real, 8>(), f_small_indexed_1);
  183. bench_all(ra::Small<real, 2, 2>(), f_small_indexed_2);
  184. bench_all(ra::Small<real, 3, 3>(), f_small_indexed_2);
  185. bench_all(ra::Small<real, 4, 4>(), f_small_indexed_2);
  186. bench_all(ra::Small<real, 3, 3, 3>(), f_small_indexed_3);
  187. }
  188. rspec = 2e-11;
  189. auto bench = [&tr](auto && a, auto && b, auto && ref, real rspec, int reps, auto && f)
  190. {
  191. real x = 0.;
  192. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { x += f(a, b); });
  193. tr.info(Benchmark::avg(bv)/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/1e-9, "] ", f.name)
  194. .test_rel(ref*3, x, rspec);
  195. };
  196. #define BENCH(f) bench(A, B, ref, rspec, N, f {});
  197. tr.section("std::vector<>");
  198. {
  199. std::vector<real> A(S1[0], a);
  200. std::vector<real> B(S1[0], b);
  201. BENCH(by_raw);
  202. }
  203. tr.section("unchecked pointer");
  204. {
  205. std::unique_ptr<real []> Au { new real[S1[0]] };
  206. std::unique_ptr<real []> Bu { new real[S1[0]] };
  207. auto A = Au.get();
  208. auto B = Bu.get();
  209. std::fill(A, A+S1[0], a);
  210. std::fill(B, B+S1[0], b);
  211. BENCH(by_raw);
  212. }
  213. #define BENCH_ALL \
  214. FOR_EACH(BENCH, by_raw); \
  215. FOR_EACH(BENCH, by_1l_ply_ravel, by_2l_ply_ravel, by_1w_ply_ravel, by_2w_ply_ravel); \
  216. FOR_EACH(BENCH, by_1l_ply_fixed, by_2l_ply_fixed, by_1w_ply_fixed, by_2w_ply_fixed);
  217. tr.section("ra:: wrapped std::vector<>");
  218. {
  219. auto A = std::vector<real>(S1[0], a);
  220. auto B = std::vector<real>(S1[0], b);
  221. BENCH_ALL;
  222. }
  223. tr.section("Unique<1>");
  224. {
  225. ra::Unique<real, 1> A(S1, a);
  226. ra::Unique<real, 1> B(S1, b);
  227. BENCH_ALL;
  228. }
  229. tr.section("Unique<2>");
  230. {
  231. ra::Unique<real, 2> A(S2, a);
  232. ra::Unique<real, 2> B(S2, b);
  233. BENCH_ALL;
  234. }
  235. tr.section("Unique<3>");
  236. {
  237. ra::Unique<real, 3> A(S3, a);
  238. ra::Unique<real, 3> B(S3, b);
  239. BENCH_ALL;
  240. }
  241. return tr.summary();
  242. }