2 次代码提交 078130f06e ... d0ee61a4ab

作者 SHA1 备注 提交日期
  lloda d0ee61a4ab Omit rt match checks when only one arg has positive rank 8 月之前
  lloda fd57d9d326 Give up on RA_STATIC_UNROLL 8 月之前
共有 6 个文件被更改,包括 20 次插入49 次删除
  1. 2 4
      bench/SConstruct
  2. 3 6
      config/ra.py
  3. 3 3
      ra/expr.hh
  4. 6 32
      ra/ply.hh
  5. 1 4
      test/SConstruct
  6. 5 0
      test/checks.cc

+ 2 - 4
bench/SConstruct

@@ -68,11 +68,9 @@ tester = ra.to_test_ra(env, variant_dir)
                'bench-pack', 'bench-from',
                'bench-stencil1', 'bench-stencil2', 'bench-stencil3',
                'bench-optimize', 'bench-tensorindex',
-               'bench-iterator', 'bench-at'
+               'bench-iterator', 'bench-at',
+               'bench-dot'
            ]]
 
-tester('bench-dot', target='bench-dot-no-su', cppdefines={'RA_STATIC_UNROLL': '0'})
-tester('bench-dot', target='bench-dot-su', cppdefines={'RA_STATIC_UNROLL': '1'})
-
 if not top['skip_summary']:
     atexit.register(lambda: ra.print_summary(GetBuildFailures, 'ra/bench'))

+ 3 - 6
config/ra.py

@@ -130,12 +130,9 @@ def to_source_from_noweb(env, targets, source):
     return [env.Notangle(target, remove_ext(main) + '.nw') for target in targets]
 
 def to_test_ra(env_, variant_dir):
-    def f(source, target='', cxxflags=[], cppdefines=[]):
-        if len(cxxflags)==0 or len(cppdefines)==0:
-            env = env_
-        else:
-            env = env_.Clone()
-            env.Append(CXXFLAGS=cxxflags + ['-U' + k for k in cppdefines.keys()], CPPDEFINES=cppdefines)
+    def f(source, target='', cxxflags=[], cppdefines={}):
+        env = env_.Clone()
+        env.Append(CXXFLAGS=cxxflags + ['-U' + k for k in cppdefines.keys()], CPPDEFINES=cppdefines)
         if len(target)==0:
             target = source
         obj = env.Object(target, [source + '.cc'])

+ 3 - 3
ra/expr.hh

@@ -320,11 +320,11 @@ start(is_iterator auto && t) { return RA_FWD(t); }
 // --------------------
 
 constexpr rank_t
-choose_rank(rank_t ra, rank_t rb) { return BAD==rb ? ra : BAD==ra ? rb : ANY==ra ? ra : ANY==rb ? rb : std::max(ra, rb); }
+choose_rank(rank_t a, rank_t b) { return BAD==b ? a : BAD==a ? b : ANY==a ? a : ANY==b ? b : std::max(a, b); }
 
 // pick first if mismatch (see below). FIXME maybe return invalid.
 constexpr dim_t
-choose_len(dim_t sa, dim_t sb) { return BAD==sa ? sb : BAD==sb ? sa : ANY==sa ? sb : sa; }
+choose_len(dim_t a, dim_t b) { return BAD==a ? b : BAD==b ? a : ANY==a ? b : a; }
 
 template <bool checkp, class T, class K=mp::iota<mp::len<T>>> struct Match;
 template <bool checkp, IteratorConcept ... P, int ... I>
@@ -340,7 +340,7 @@ struct Match<checkp, std::tuple<P ...>, mp::int_list<I ...>>
         if constexpr (sizeof...(P)<2) {
             return 2;
         } else if constexpr (ANY==rs) {
-            return 1; // FIXME can be tightened to 2 if all args are rank 0 save one
+            return sizeof...(P)==1+(bool(0==ra::rank_s<P>()) + ...) ? 2 : 1;
         } else {
             bool tbc = false;
             for (int k=0; k<rs; ++k) {

+ 6 - 32
ra/ply.hh

@@ -220,11 +220,6 @@ subply(A & a, dim_t s, S const & ss0, Early & early)
     }
 }
 
-// possibly pessimize ply_fixed(). See bench-dot [ra43]
-#ifndef RA_STATIC_UNROLL
-#define RA_STATIC_UNROLL 0
-#endif
-
 template <IteratorConcept A, class Early = Nop>
 constexpr decltype(auto)
 ply_fixed(A && a, Early && early = Nop {})
@@ -241,35 +236,14 @@ ply_fixed(A && a, Early && early = Nop {})
             return;
         }
     } else {
-// static keep_step implies all else is static.
-        if constexpr (RA_STATIC_UNROLL && rank>1 && requires (dim_t st, rank_t z, rank_t j) { A::keep_step(st, z, j); }) {
-            constexpr auto ss0 = a.step(order[0]);
-// find outermost compact dim.
-            constexpr auto sj = [&order]
-            {
-                dim_t ss = A::len_s(order[0]);
-                int j = 1;
-                for (; j<rank && A::keep_step(ss, order[0], order[j]); ++j) {
-                    ss *= A::len_s(order[j]);
-                }
-                return std::make_tuple(ss, j);
-            } ();
-            if constexpr (requires {early.def;}) {
-                return (subply<order, rank-1, std::get<1>(sj)>(a, std::get<0>(sj), ss0, early)).value_or(early.def);
-            } else {
-                subply<order, rank-1, std::get<1>(sj)>(a, std::get<0>(sj), ss0, early);
-            }
-        } else {
-#pragma GCC diagnostic push // gcc 12.2 and 13.2 with RA_DO_CHECK=0 and -fno-sanitize=all
+#pragma GCC diagnostic push
 #pragma GCC diagnostic warning "-Warray-bounds"
-            auto ss0 = a.step(order[0]); // gcc 14.1 with RA_DO_CHECK=0 and sanitizer on
-// not worth unrolling.
-            if constexpr (requires {early.def;}) {
-                return (subply<order, rank-1, 1>(a, a.len(order[0]), ss0, early)).value_or(early.def);
-            } else {
-                subply<order, rank-1, 1>(a, a.len(order[0]), ss0, early);
-            }
+    auto ss0 = a.step(order[0]); // gcc 14.1 with RA_DO_CHECK=0 and sanitizer on
 #pragma GCC diagnostic pop
+        if constexpr (requires {early.def;}) {
+            return (subply<order, rank-1, 1>(a, a.len(order[0]), ss0, early)).value_or(early.def);
+        } else {
+            subply<order, rank-1, 1>(a, a.len(order[0]), ss0, early);
         }
     }
 }

+ 1 - 4
test/SConstruct

@@ -53,7 +53,7 @@ tester = ra.to_test_ra(env, variant_dir)
 
 [tester(test)
  for test in ['at', 'bench', 'big-0', 'big-1', 'bug83', 'cellrank', 'checks', 'compatibility',
-              'concrete', 'const', 'constexpr', 'dual', 'explode-0', 'foreign', 'frame-new',
+              'concrete', 'const', 'constexpr', 'dual', 'early', 'explode-0', 'foreign', 'frame-new',
               'frame-old', 'fromb', 'fromu', 'headers', 'io', 'iota', 'iterator-small', 'len',
               'list9', 'macros', 'mem-fn', 'ndebug', 'nested-0', 'old', 'operators', 'optimize',
               'owned', 'ownership', 'ply', 'ra-0', 'ra-1', 'ra-10', 'ra-11', 'ra-12', 'ra-13',
@@ -64,9 +64,6 @@ tester = ra.to_test_ra(env, variant_dir)
               'wrank'
               ]]
 
-tester('early', target='early-no-su', cppdefines={'RA_STATIC_UNROLL': '0'})
-tester('early', target='early-su', cppdefines={'RA_STATIC_UNROLL': '1'})
-
 tester('ra-10', target='ra-10a', cxxflags=['-O3'], cppdefines={'RA_DO_CHECK': '0'})
 tester('ra-10', target='ra-10b', cxxflags=['-O1'], cppdefines={'RA_DO_CHECK': '0'})
 tester('ra-10', target='ra-10c', cxxflags=['-O3'], cppdefines={'RA_DO_CHECK': '1'})

+ 5 - 0
test/checks.cc

@@ -120,6 +120,11 @@ int main()
 // see test/frame-new.cc
 // ------------------------------
 
+    tr.section("static match in dynamic case");
+    {
+        ra::Big<int> a({2, 3, 4}, 0);
+        tr.test_eq(2, agree_s(a, 99));
+    }
     tr.section("dynamic (implicit) match");
     {
         ra::Big<int, 3> a({2, 3, 4}, (ra::_0+1)*100 + (ra::_1+1)*10 + (ra::_2+1));