123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266 |
- // -*- mode: c++; coding: utf-8 -*-
- // ra-ra/bench - dot() with various array types.
- // (c) Daniel Llorens - 2011-2023
- // This library is free software; you can redistribute it and/or modify it under
- // the terms of the GNU Lesser General Public License as published by the Free
- // Software Foundation; either version 3 of the License, or (at your option) any
- // later version.
- #include <iostream>
- #include <iomanip>
- #include <random>
- #include "ra/bench.hh"
- using std::cout, std::endl, std::setw, std::setprecision, ra::TestRecorder;
- using ra::Small, ra::Unique, ra::dim_t;
- using real = double;
- int const N = 100000;
- Small<dim_t, 1> S1 { 24*24 };
- Small<dim_t, 2> S2 { 24, 24 };
- Small<dim_t, 3> S3 { 8, 8, 9 };
- struct by_raw
- {
- constexpr static char const * name = "raw";
- template <class A, class B>
- real operator()(A const & a, B const & b)
- {
- constexpr int rank = ra::rank_s<A>();
- if constexpr ((std::is_pointer_v<A> && std::is_pointer_v<B>) || 1==rank) {
- real y(0.);
- for (int j=0; j<S1[0]; ++j) {
- y += a[j]*b[j];
- }
- return y;
- } else if constexpr (2==rank) {
- real y(0.);
- for (int j=0; j<S2[0]; ++j) {
- for (int k=0; k<S2[1]; ++k) {
- y += a(j, k)*b(j, k);
- }
- }
- return y;
- } else if constexpr (3==rank) {
- real y(0.);
- for (int j=0; j<S3[0]; ++j) {
- for (int k=0; k<S3[1]; ++k) {
- for (int l=0; l<S3[2]; ++l) {
- y += a(j, k, l)*b(j, k, l);
- }
- }
- }
- return y;
- } else {
- abort();
- }
- }
- };
- #define BY_PLY(NAME, INSIDE) \
- struct NAME { \
- constexpr static char const * name = STRINGIZE(NAME); \
- template <class A, class B> real operator()(A && a, B && b) \
- { \
- real y(0.); \
- INSIDE; \
- return y; \
- } \
- }; \
- #define BY_PLY_TAGGED(PLYER) \
- /* plain */ \
- BY_PLY(JOIN(by_1l_, PLYER), \
- ra::PLYER(ra::map([&y](real const a, real const b) { y += a*b; }, \
- a, b))) \
- \
- /* separate reduction */ \
- BY_PLY(JOIN(by_2l_, PLYER), \
- ra::PLYER(ra::map([&y](real const a) { y += a; }, \
- ra::map([](real const a, real const b) { return a*b; }, \
- a, b)))) \
- \
- /* using trivial rank conjunction. FIXME requires static rank */ \
- BY_PLY(JOIN(by_1w_, PLYER), \
- ra::PLYER(ra::map(ra::wrank<0, 0>([&y](real const a, real const b) { y += a*b; }), \
- a, b))) \
- \
- /* separate reduction: using trivial rank conjunction. FIXME requires static rank */ \
- BY_PLY(JOIN(by_2w_, PLYER), \
- ra::PLYER(ra::map(ra::wrank<0>([&y](real const a) { y += a; }), \
- ra::map(ra::wrank<0, 0>([](real const a, real const b) { return a*b; }), \
- a, b))));
- BY_PLY_TAGGED(ply_ravel);
- BY_PLY_TAGGED(ply_fixed);
- real a, b, ref, rspec;
- int main()
- {
- TestRecorder tr;
- tr.o.width(6);
- tr.o.precision(4);
- cout << "RA_DO_FMA is " << RA_DO_FMA << endl;
- std::random_device rand;
- a = rand();
- b = rand();
- ref = a*b*S1[0]*N;
- {
- auto bench = [&tr](char const * tag, auto s, auto && ref, int reps, auto && f)
- {
- rspec = 1e-2;
- constexpr int M = ra::size(s);
- decltype(s) A(a);
- decltype(s) B(b);
- real y(0.);
- auto bv = Benchmark().repeats(reps).runs(3).run([&]() { y += f(A, B); });
- tr.info(Benchmark::avg(bv)/M/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/M/1e-9, "] ", tag)
- .test_rel(a*b*M*reps*3, y, rspec);
- };
- auto f_small_indexed_1 = [](auto && A, auto && B)
- {
- real y = 0;
- for (int j=0; j!=A.size(); ++j) {
- y += A(j)*B(j);
- }
- return y;
- };
- auto f_small_indexed_2 = [](auto && A, auto && B)
- {
- real y = 0;
- for (int i=0; i!=A.len(0); ++i) {
- for (int j=0; j!=A.len(1); ++j) {
- y += A(i, j)*B(i, j);
- }
- }
- return y;
- };
- auto f_small_indexed_3 = [](auto && A, auto && B)
- {
- real y = 0;
- for (int i=0; i!=A.len(0); ++i) {
- for (int j=0; j!=A.len(1); ++j) {
- for (int k=0; k!=A.len(2); ++k) {
- y += A(i, j, k)*B(i, j, k);
- }
- }
- }
- return y;
- };
- auto f_small_indexed_raw = [](auto && A, auto && B)
- {
- real * a = A.data();
- real * b = B.data();
- real y = 0;
- for (int j=0; j!=A.size(); ++j) {
- y += a[j]*b[j];
- }
- return y;
- };
- // optimize() plugs into the definition of operator*, etc. See ply_fixed [ra43].
- auto f_small_op = [](auto && A, auto && B)
- {
- return sum(A*B);
- };
- #define DEFINE_SMALL_PLY(name, plier) \
- auto JOIN(f_small_, plier) = [](auto && A, auto && B) \
- { \
- real y = 0; \
- plier(ra::map([&y](real a, real b) { y += a*b; }, A, B)); \
- return y; \
- }
- DEFINE_SMALL_PLY(ply_ravel, ply_ravel);
- DEFINE_SMALL_PLY(ply_fixed, ply_fixed);
- DEFINE_SMALL_PLY(ply, ply);
- auto bench_all = [&](auto s, auto && f_small_indexed)
- {
- constexpr int M = ra::size(s);
- tr.section("small <", ra::shape(s), ">");
- auto extra = [&]() { return int(double(std::rand())*100/RAND_MAX); };
- int reps = (1000*1000*100)/M;
- bench("indexed", s, ref, reps+extra(), f_small_indexed);
- bench("indexed_raw", s, ref, reps+extra(), f_small_indexed_raw);
- bench("op", s, ref, reps+extra(), f_small_op);
- bench("ply_ravel", s, ref, reps+extra(), f_small_ply_ravel);
- bench("ply_fixed", s, ref, reps+extra(), f_small_ply_fixed);
- bench("ply", s, ref, reps+extra(), f_small_ply);
- };
- bench_all(ra::Small<real, 2>(), f_small_indexed_1);
- bench_all(ra::Small<real, 3>(), f_small_indexed_1);
- bench_all(ra::Small<real, 4>(), f_small_indexed_1);
- bench_all(ra::Small<real, 8>(), f_small_indexed_1);
- bench_all(ra::Small<real, 2, 2>(), f_small_indexed_2);
- bench_all(ra::Small<real, 3, 3>(), f_small_indexed_2);
- bench_all(ra::Small<real, 4, 4>(), f_small_indexed_2);
- bench_all(ra::Small<real, 3, 3, 3>(), f_small_indexed_3);
- }
- rspec = 2e-11;
- auto bench = [&tr](auto && a, auto && b, auto && ref, real rspec, int reps, auto && f)
- {
- real x = 0.;
- auto bv = Benchmark().repeats(reps).runs(3).run([&]() { x += f(a, b); });
- tr.info(Benchmark::avg(bv)/1e-9, " ns [", std::setprecision(3), Benchmark::stddev(bv)/1e-9, "] ", f.name)
- .test_rel(ref*3, x, rspec);
- };
- #define BENCH(f) bench(A, B, ref, rspec, N, f {});
- tr.section("std::vector<>");
- {
- std::vector<real> A(S1[0], a);
- std::vector<real> B(S1[0], b);
- BENCH(by_raw);
- }
- tr.section("unchecked pointer");
- {
- std::unique_ptr<real []> Au { new real[S1[0]] };
- std::unique_ptr<real []> Bu { new real[S1[0]] };
- auto A = Au.get();
- auto B = Bu.get();
- std::fill(A, A+S1[0], a);
- std::fill(B, B+S1[0], b);
- BENCH(by_raw);
- }
- #define BENCH_ALL \
- FOR_EACH(BENCH, by_raw); \
- FOR_EACH(BENCH, by_1l_ply_ravel, by_2l_ply_ravel, by_1w_ply_ravel, by_2w_ply_ravel); \
- FOR_EACH(BENCH, by_1l_ply_fixed, by_2l_ply_fixed, by_1w_ply_fixed, by_2w_ply_fixed);
- tr.section("ra:: wrapped std::vector<>");
- {
- auto A = std::vector<real>(S1[0], a);
- auto B = std::vector<real>(S1[0], b);
- BENCH_ALL;
- }
- tr.section("Unique<1>");
- {
- ra::Unique<real, 1> A(S1, a);
- ra::Unique<real, 1> B(S1, b);
- BENCH_ALL;
- }
- tr.section("Unique<2>");
- {
- ra::Unique<real, 2> A(S2, a);
- ra::Unique<real, 2> B(S2, b);
- BENCH_ALL;
- }
- tr.section("Unique<3>");
- {
- ra::Unique<real, 3> A(S3, a);
- ra::Unique<real, 3> B(S3, b);
- BENCH_ALL;
- }
- return tr.summary();
- }
|