bench-reduce-sqrm.cc 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // -*- mode: c++; coding: utf-8 -*-
  2. // ra-ra/bench - reduce_sqrm() with various array types.
  3. // (c) Daniel Llorens - 2011, 2022
  4. // This library is free software; you can redistribute it and/or modify it under
  5. // the terms of the GNU Lesser General Public License as published by the Free
  6. // Software Foundation; either version 3 of the License, or (at your option) any
  7. // later version.
  8. #include <iostream>
  9. #include <iomanip>
  10. #include "ra/test.hh"
  11. using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder, ra::Benchmark;
  12. using real = double;
  13. using real4 = ra::Small<real, 4>;
  14. using ra::sqrm;
  15. int const N = 500000;
  16. ra::Small<ra::dim_t, 1> S1 { 24*24 };
  17. ra::Small<ra::dim_t, 2> S2 { 24, 24 };
  18. ra::Small<ra::dim_t, 3> S3 { 8, 8, 9 };
  19. TestRecorder tr(std::cout);
  20. real y;
  21. template <class BV>
  22. void report(int size, BV const & bv)
  23. {
  24. tr.info(std::setw(5), std::fixed, Benchmark::avg(bv)/size/1e-9, " ns [", Benchmark::stddev(bv)/size/1e-9 ,"] ", bv.name)
  25. .test_eq(prod(S1)*N*4*4, y);
  26. }
  27. int main()
  28. {
  29. Benchmark bm = Benchmark().runs(3);
  30. report(4,
  31. bm.name("real4 raw").repeats(N*prod(S1)/4)
  32. .run_f([&](auto && repeat)
  33. {
  34. real4 A(7.), B(3.);
  35. y = 0.;
  36. repeat([&] {
  37. for (int j=0; j!=4; ++j) {
  38. y += sqrm(A(j)-B(j));
  39. }
  40. });
  41. }));
  42. report(4,
  43. bm.name("real4 expr").repeats(N*prod(S1)/4)
  44. .run_f([&](auto && repeat)
  45. {
  46. real4 A(7.), B(3.);
  47. y = 0.;
  48. repeat([&] {
  49. y += reduce_sqrm(A-B);
  50. });
  51. }));
  52. report(prod(S1),
  53. bm.name("C array raw").repeats(N)
  54. .run_f([&](auto && repeat)
  55. {
  56. ra::Unique<real, 1> A(S1, 7.);
  57. ra::Unique<real, 1> B(S1, 3.);
  58. y = 0.;
  59. repeat([&] {
  60. real const * a = A.data();
  61. real const * b = B.data();
  62. for (int j=0; j<S1[0]; ++j) {
  63. y += sqrm(a[j]-b[j]);
  64. }
  65. });
  66. }));
  67. // sqrm+reduction in one op.
  68. auto traversal = [&](auto && repeat, auto const & a, auto const & b)
  69. {
  70. y = 0.;
  71. repeat([&] {
  72. for_each([&](real const a, real const b) { y += sqrm(a, b); }, a, b);
  73. });
  74. };
  75. // separate reduction: compare abstraction penalty with by_traversal.
  76. auto traversal2 = [&](auto && repeat, auto const & a, auto const & b)
  77. {
  78. y = 0.;
  79. repeat([&] {
  80. for_each([&](real const a) { y += a; },
  81. map([](real const a, real const b) { return sqrm(a, b); },
  82. a, b));
  83. });
  84. };
  85. {
  86. ra::Unique<real, 1> A(S1, 7.);
  87. ra::Unique<real, 1> B(S1, 3.);
  88. report(prod(S1), bm.name("ra::Unique<1> ply nested 1").repeats(N).once_f(traversal, A, B));
  89. report(prod(S1), bm.name("ra::Unique<1> ply nested 2").repeats(N).once_f(traversal2, A, B));
  90. report(prod(S1), bm.name("ra::Unique<1> raw").repeats(N)
  91. .once_f([&](auto && repeat)
  92. {
  93. y = 0.;
  94. repeat([&] {
  95. for (int j=0; j<S1[0]; ++j) {
  96. y += sqrm(A(j)-B(j));
  97. }
  98. });
  99. }));
  100. }
  101. {
  102. ra::Unique<real, 2> A(S2, 7.);
  103. ra::Unique<real, 2> B(S2, 3.);
  104. report(prod(S2), bm.name("ra::Unique<2> ply nested 1").repeats(N).once_f(traversal, A, B));
  105. report(prod(S2), bm.name("ra::Unique<2> ply nested 2").repeats(N).once_f(traversal2, A, B));
  106. report(prod(S2), bm.name("ra::Unique<2> raw").repeats(N)
  107. .once_f([&](auto && repeat)
  108. {
  109. y = 0.;
  110. repeat([&] {
  111. for (int j=0; j<S2[0]; ++j) {
  112. for (int k=0; k<S2[1]; ++k) {
  113. y += sqrm(A(j, k)-B(j, k));
  114. }
  115. }
  116. });
  117. }));
  118. }
  119. {
  120. ra::Unique<real, 3> A(S3, 7.);
  121. ra::Unique<real, 3> B(S3, 3.);
  122. report(prod(S3), bm.name("ra::Unique<3> ply nested 1").repeats(N).once_f(traversal, A, B));
  123. report(prod(S3), bm.name("ra::Unique<3> ply nested 2").repeats(N).once_f(traversal2, A, B));
  124. report(prod(S3), bm.name("ra::Unique<3> raw").repeats(N)
  125. .once_f([&](auto && repeat)
  126. {
  127. y = 0.;
  128. repeat([&] {
  129. for (int j=0; j<S3[0]; ++j) {
  130. for (int k=0; k<S3[1]; ++k) {
  131. for (int l=0; l<S3[2]; ++l) {
  132. y += sqrm(A(j, k, l)-B(j, k, l));
  133. }
  134. }
  135. }
  136. });
  137. }));
  138. }
  139. return tr.summary();
  140. }