123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- #include <iostream>
- #include <iomanip>
- #include "ra/operators.H"
- #include "ra/io.H"
- #include "ra/test.H"
- #include "ra/bench.H"
- using std::cout, std::endl, std::flush;
- using real = double;
- int main()
- {
- TestRecorder tr(cout);
- cout.precision(4);
- auto bench =
- [&tr](char const * tag, int m, int n, int reps, auto && f)
- {
- ra::Big<real, 2> a({m, n}, ra::_0 - ra::_1);
- ra::Big<real, 1> ref({n}, 0);
- iter<1>(ref) += iter<1>(a)*reps;
- ra::Big<real, 1> c({n}, ra::none);
- auto bv = Benchmark().repeats(reps).runs(3)
- .once_f([&](auto && repeat) { c=0.; repeat([&]() { f(c, a); }); });
- tr.info(std::setw(5), std::fixed, Benchmark::avg(bv)/(m*n)/1e-9, " ns [",
- Benchmark::stddev(bv)/(m*n)/1e-9 ,"] ", tag).test_eq(ref, c);
- };
- auto bench_all =
- [&](int m, int n, int reps)
- {
- tr.section(m, " x ", n, " times ", reps);
- bench("raw", m, n, reps,
- [](auto & c, auto const & a)
- {
- real * __restrict__ ap = a.data();
- real * __restrict__ cp = c.data();
- ra::dim_t const m = a.size(0);
- ra::dim_t const n = a.size(1);
- for (ra::dim_t i=0; i!=m; ++i) {
- for (ra::dim_t j=0; j!=n; ++j) {
- cp[j] += ap[i*n+j];
- }
- }
- });
- bench("sideways", m, n, reps,
- [](auto & c, auto const & a)
- {
- for (int j=0, jend=a.size(1); j<jend; ++j) {
- c(j) += sum(a(ra::all, j));
- }
- });
- bench("accumrows", m, n, reps,
- [](auto & c, auto const & a)
- {
- for_each([&c](auto && a) { c += a; }, iter<1>(a));
- });
- bench("wrank1", m, n, reps,
- [](auto & c, auto const & a)
- {
- for_each(ra::wrank<1, 1>([](auto & c, auto && a) { c += a; }), c, a);
- });
- bench("wrank2", m, n, reps,
- [](auto & c, auto const & a)
- {
- for_each(ra::wrank<1, 1>(ra::wrank<0, 0>([](auto & c, auto a) { c += a; })), c, a);
- });
- bench("accumscalar", m, n, reps,
- [](auto & c, auto const & a)
- {
- ra::scalar(c) += iter<1>(a);
- });
- bench("accumiter", m, n, reps,
- [](auto & c, auto const & a)
- {
- iter<1>(c) += iter<1>(a);
- });
- bench("frametransp", m, n, reps,
- [](auto & c, auto const & a)
- {
- c += transpose<1, 0>(a);
- });
- };
- bench_all(1, 1000000, 20);
- bench_all(10, 100000, 20);
- bench_all(100, 10000, 20);
- bench_all(1000, 1000, 20);
- bench_all(10000, 100, 20);
- bench_all(100000, 10, 20);
- bench_all(1000000, 1, 20);
- bench_all(1, 10000, 2000);
- bench_all(10, 1000, 2000);
- bench_all(100, 100, 2000);
- bench_all(1000, 10, 2000);
- bench_all(10000, 1, 2000);
- return tr.summary();
- }
|