bench-dot.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra/bench - dot() with various array types.
  3. // (c) Daniel Llorens - 2011-2023
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include <random>
  11. #include "ra/bench.hh"
  12. using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder;
  13. using ra::Small, ra::Unique, ra::dim_t;
  14. using real = double;
  15. int const N = 100000;
  16. Small<dim_t, 1> S1 { 24*24 };
  17. Small<dim_t, 2> S2 { 24, 24 };
  18. Small<dim_t, 3> S3 { 8, 8, 9 };
  19. struct by_raw
  20. {
  21. constexpr static char const * name = "raw";
  22. template <class A, class B>
  23. real operator()(A const & a, B const & b)
  24. {
  25. constexpr int rank = ra::rank_s<A>();
  26. if constexpr ((std::is_pointer_v<A> && std::is_pointer_v<B>) || 1==rank) {
  27. real y(0.);
  28. for (int j=0; j<S1[0]; ++j) {
  29. y += a[j]*b[j];
  30. }
  31. return y;
  32. } else if constexpr (2==rank) {
  33. real y(0.);
  34. for (int j=0; j<S2[0]; ++j) {
  35. for (int k=0; k<S2[1]; ++k) {
  36. y += a(j, k)*b(j, k);
  37. }
  38. }
  39. return y;
  40. } else if constexpr (3==rank) {
  41. real y(0.);
  42. for (int j=0; j<S3[0]; ++j) {
  43. for (int k=0; k<S3[1]; ++k) {
  44. for (int l=0; l<S3[2]; ++l) {
  45. y += a(j, k, l)*b(j, k, l);
  46. }
  47. }
  48. }
  49. return y;
  50. } else {
  51. abort();
  52. }
  53. }
  54. };
  55. #define BY_PLY(NAME, INSIDE) \
  56. struct NAME { \
  57. constexpr static char const * name = STRINGIZE(NAME); \
  58. template <class A, class B> real operator()(A && a, B && b) \
  59. { \
  60. real y(0.); \
  61. INSIDE; \
  62. return y; \
  63. } \
  64. }; \
  65. #define BY_PLY_TAGGED(PLYER) \
  66. /* plain */ \
  67. BY_PLY(JOIN(by_1l_, PLYER), \
  68. ra::PLYER(ra::map([&y](real const a, real const b) { y += a*b; }, \
  69. a, b))) \
  70. \
  71. /* separate reduction */ \
  72. BY_PLY(JOIN(by_2l_, PLYER), \
  73. ra::PLYER(ra::map([&y](real const a) { y += a; }, \
  74. ra::map([](real const a, real const b) { return a*b; }, \
  75. a, b)))) \
  76. \
  77. /* using trivial rank conjunction. FIXME requires static rank */ \
  78. BY_PLY(JOIN(by_1w_, PLYER), \
  79. ra::PLYER(ra::map(ra::wrank<0, 0>([&y](real const a, real const b) { y += a*b; }), \
  80. a, b))) \
  81. \
  82. /* separate reduction: using trivial rank conjunction. FIXME requires static rank */ \
  83. BY_PLY(JOIN(by_2w_, PLYER), \
  84. ra::PLYER(ra::map(ra::wrank<0>([&y](real const a) { y += a; }), \
  85. ra::map(ra::wrank<0, 0>([](real const a, real const b) { return a*b; }), \
  86. a, b))));
  87. BY_PLY_TAGGED(ply_ravel);
  88. BY_PLY_TAGGED(ply_fixed);
  89. real a, b, ref, rspec;
  90. int main()
  91. {
  92. TestRecorder tr;
  93. tr.o.width(6);
  94. tr.o.precision(4);
  95. cout << "RA_DO_FMA is " << RA_DO_FMA << endl;
  96. std::random_device rand;
  97. a = rand();
  98. b = rand();
  99. ref = a*b*S1[0]*N;
  100. {
  101. auto bench = [&tr](char const * tag, auto s, auto && ref, int reps, auto && f)
  102. {
  103. rspec = 1e-2;
  104. constexpr int M = ra::size(s);
  105. decltype(s) A(a);
  106. decltype(s) B(b);
  107. real y(0.);
  108. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { y += f(A, B); });
  109. tr.info(Benchmark::avg(bv)/M/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/M/1e-9, "] ", tag)
  110. .test_rel(a*b*M*reps*3, y, rspec);
  111. };
  112. auto f_small_indexed_1 = [](auto && A, auto && B)
  113. {
  114. real y = 0;
  115. for (int j=0; j!=A.size(); ++j) {
  116. y += A(j)*B(j);
  117. }
  118. return y;
  119. };
  120. auto f_small_indexed_2 = [](auto && A, auto && B)
  121. {
  122. real y = 0;
  123. for (int i=0; i!=A.len(0); ++i) {
  124. for (int j=0; j!=A.len(1); ++j) {
  125. y += A(i, j)*B(i, j);
  126. }
  127. }
  128. return y;
  129. };
  130. auto f_small_indexed_3 = [](auto && A, auto && B)
  131. {
  132. real y = 0;
  133. for (int i=0; i!=A.len(0); ++i) {
  134. for (int j=0; j!=A.len(1); ++j) {
  135. for (int k=0; k!=A.len(2); ++k) {
  136. y += A(i, j, k)*B(i, j, k);
  137. }
  138. }
  139. }
  140. return y;
  141. };
  142. auto f_small_indexed_raw = [](auto && A, auto && B)
  143. {
  144. real * a = A.data();
  145. real * b = B.data();
  146. real y = 0;
  147. for (int j=0; j!=A.size(); ++j) {
  148. y += a[j]*b[j];
  149. }
  150. return y;
  151. };
  152. // optimize() plugs into the definition of operator*, etc. See ply_fixed [ra43].
  153. auto f_small_op = [](auto && A, auto && B)
  154. {
  155. return sum(A*B);
  156. };
  157. #define DEFINE_SMALL_PLY(name, plier) \
  158. auto JOIN(f_small_, plier) = [](auto && A, auto && B) \
  159. { \
  160. real y = 0; \
  161. plier(ra::map([&y](real a, real b) { y += a*b; }, A, B)); \
  162. return y; \
  163. }
  164. DEFINE_SMALL_PLY(ply_ravel, ply_ravel);
  165. DEFINE_SMALL_PLY(ply_fixed, ply_fixed);
  166. DEFINE_SMALL_PLY(ply, ply);
  167. auto bench_all = [&](auto s, auto && f_small_indexed)
  168. {
  169. constexpr int M = ra::size(s);
  170. tr.section("small <", ra::shape(s), ">");
  171. auto extra = [&]() { return int(double(std::rand())*100/RAND_MAX); };
  172. int reps = (1000*1000*100)/M;
  173. bench("indexed", s, ref, reps+extra(), f_small_indexed);
  174. bench("indexed_raw", s, ref, reps+extra(), f_small_indexed_raw);
  175. bench("op", s, ref, reps+extra(), f_small_op);
  176. bench("ply_ravel", s, ref, reps+extra(), f_small_ply_ravel);
  177. bench("ply_fixed", s, ref, reps+extra(), f_small_ply_fixed);
  178. bench("ply", s, ref, reps+extra(), f_small_ply);
  179. };
  180. bench_all(ra::Small<real, 2>(), f_small_indexed_1);
  181. bench_all(ra::Small<real, 3>(), f_small_indexed_1);
  182. bench_all(ra::Small<real, 4>(), f_small_indexed_1);
  183. bench_all(ra::Small<real, 8>(), f_small_indexed_1);
  184. bench_all(ra::Small<real, 2, 2>(), f_small_indexed_2);
  185. bench_all(ra::Small<real, 3, 3>(), f_small_indexed_2);
  186. bench_all(ra::Small<real, 4, 4>(), f_small_indexed_2);
  187. bench_all(ra::Small<real, 3, 3, 3>(), f_small_indexed_3);
  188. }
  189. rspec = 2e-11;
  190. auto bench = [&tr](auto && a, auto && b, auto && ref, real rspec, int reps, auto && f)
  191. {
  192. real x = 0.;
  193. auto bv = Benchmark().repeats(reps).runs(3).run([&]() { x += f(a, b); });
  194. tr.info(Benchmark::avg(bv)/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/1e-9, "] ", f.name)
  195. .test_rel(ref*3, x, rspec);
  196. };
  197. #define BENCH(f) bench(A, B, ref, rspec, N, f {});
  198. tr.section("std::vector<>");
  199. {
  200. std::vector<real> A(S1[0], a);
  201. std::vector<real> B(S1[0], b);
  202. BENCH(by_raw);
  203. }
  204. tr.section("unchecked pointer");
  205. {
  206. std::unique_ptr<real []> Au { new real[S1[0]] };
  207. std::unique_ptr<real []> Bu { new real[S1[0]] };
  208. auto A = Au.get();
  209. auto B = Bu.get();
  210. std::fill(A, A+S1[0], a);
  211. std::fill(B, B+S1[0], b);
  212. BENCH(by_raw);
  213. }
  214. #define BENCH_ALL \
  215. FOR_EACH(BENCH, by_raw); \
  216. FOR_EACH(BENCH, by_1l_ply_ravel, by_2l_ply_ravel, by_1w_ply_ravel, by_2w_ply_ravel); \
  217. FOR_EACH(BENCH, by_1l_ply_fixed, by_2l_ply_fixed, by_1w_ply_fixed, by_2w_ply_fixed);
  218. tr.section("ra:: wrapped std::vector<>");
  219. {
  220. auto A = std::vector<real>(S1[0], a);
  221. auto B = std::vector<real>(S1[0], b);
  222. BENCH_ALL;
  223. }
  224. tr.section("Unique<1>");
  225. {
  226. ra::Unique<real, 1> A(S1, a);
  227. ra::Unique<real, 1> B(S1, b);
  228. BENCH_ALL;
  229. }
  230. tr.section("Unique<2>");
  231. {
  232. ra::Unique<real, 2> A(S2, a);
  233. ra::Unique<real, 2> B(S2, b);
  234. BENCH_ALL;
  235. }
  236. tr.section("Unique<3>");
  237. {
  238. ra::Unique<real, 3> A(S3, a);
  239. ra::Unique<real, 3> B(S3, b);
  240. BENCH_ALL;
  241. }
  242. return tr.summary();
  243. }