123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540 |
- #ifndef _PAIRWISE_SUM_h_
- #define _PAIRWISE_SUM_h_
- /* PAIRWISE_SUM.h
- *
- * Copyright (C) 2017,2018 Paul Boersma <paul.boersma@uva.nl>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * (This is the two-clause BSD license, which is approximately identical to CC-BY.)
- */
- /*
- # PAIRWISE SUMMATION FOR C AND C++
- The pairwise summation algorithm as implemented here is approximately 1.5 times
- faster than using the usual naive sequential summation loop,
- and is also hundreds of times more precise than sequential summation.
- Using pairwise summation instead of sequential summation will improve all your
- code for sums, means, inner products, norms, matrix multiplications,
- and CPU-based neural networks.
-
- This is not necessarily a small thing in terms of CO2.
- The macro you will call is
- PAIRWISE_SUM (
- AccumulatorType, // the type of the accumulator: long double, double, float;
- sumVariableName, // the variable name of the accumulator, e.g. "sum";
- CounterType, // the type of the term counters: long, int, int64_t, intptr_t;
- sizeExpression, // the total number of terms to sum;
- initializeStatement, // the statement that declares and initializes the loop pointer(s),
- // which is/are typically a pointer or pointers to the data;
- termExpression // the expression that computes a "term" (thing to sum)
- // from a piece or pieces of data.
- incrementStatement, // the statement that increments the loop pointer(s)
- // before the next piece(s) of data is/are fetched;
- )
- We explain this in detail below, pedagogically starting with an analogous macro
- for the more usual "sequential" summation algorithm.
- ## 1. SEQUENTIAL SUMMATION: A SINGLE ACCUMULATOR
- The simplest use of summation of multiple values is if you have a single array of
- floating-point data to sum. If this array is x [1..size],
- then the usual straightforward algorithm for adding the terms is
- long double sum = 0.0;
- for (long i = 1; i <= size; i ++)
- sum += x [i];
- printf ("%.17g", (double) sum);
-
- This is *sequential summation*: a single *accumulator*, namely the variable `sum`,
- accumulates every element sequentially. Note that even though the array `x` will
- typically be of type `double`, the accumulator is of type `long double` in this example:
- using 80 bits for summing is much better than using 64 bits (1000 times more precise),
- and on modern computers it is usually equally fast.
- The formulation above, which uses array indexing, is equivalent to a formulation
- in terms of a "looping pointer":
- long double sum = 0.0;
- const double *xx = & x [1]; // the looping pointer
- for (long i = 1; i <= size; i ++) {
- sum += *xx;
- xx += 1;
- }
- printf ("%.17g", (double) sum);
- The looping-pointer formulation consists of defining (declaring and initializing)
- the pointer before the loop, and then inside the loop
- first dereferencing the pointer and then incrementing it.
- In this little algorithm we can discern the following seven ingredients:
- - AccumulatorType: long double
- - sumVariableName: "sum"
- - CounterType: long
- - sizeExpression: "size"
- - initializeStatement: "const double *xx = x"
- - termExpression: "*xx"
- - incrementStatement: "xx += 1"
- The algorithm can therefore be replaced with
-
- SEQUENTIAL_SUM (long double, sum, long, size, const double *xx = & x [1], *xx, xx += 1)
- printf ("%.17g", (double) sum);
- where the SEQUENTIAL_SUM macro is defined as:
- */
- #define SEQUENTIAL_SUM(AccumulatorType, sumVariableName, CounterType, sizeExpression, \
- initializeStatement, termExpression, incrementStatement) \
- \
- AccumulatorType sumVariableName = 0.0; \
- {/* scope */ \
- initializeStatement; \
- CounterType _n = sizeExpression; /* to ensure that the size expression is evaluated only once */ \
- for (CounterType _i = 1; _i <= _n; _i ++) { \
- sumVariableName += termExpression; \
- incrementStatement; \
- } \
- }
- /*
- ## 2. SEQUENTIAL SUMMATION IN MACHINE CODE
- How would you sum the four numbers a, b, c and d,
- if all that your computer can do is adding two numbers at a time?
-
- You could write the sum in one stroke in C, without parentheses:
- sum = a + b + c + d;
- Evaluation of pluses in C proceeds from left to right, so this formulation means the same as
- sum = ((a + b) + c) + d;
- In machine-like C, using the "registers" r1, r2, r3 and r4, you could implement this as
- r1 = a;
- r2 = b;
- r1 += r2;
- r3 = c;
- r1 += r3;
- r4 = d;
- r1 += r4;
- sum = r1;
- All these three formulations lead to identical true machine code,
- at least with Clang or GCC, and with optimization option -O3.
- ## 3. SPEED OF SEQUENTIAL SUMMATION
-
- If your processor has good prefetching, then the four loads from RAM (moving a, b, c,
- and d into the registers r1, r2, r3 and r4) will take up no time at all.
- The three additions, however, will require three clock cycles in total:
- as all additions are performed on r1, each addition will have to wait
- until the previous addition has finished. Adding up 64 numbers will require 63 cycles.
- ## 4. PRECISION OF SEQUENTIAL SUMMATION
-
- The worst case floating-point rounding error for sequential summation is N * epsilon,
- where N is the number of numbers to add, and epsilon is the precision of one number.
- The expected random-walk rounding error is epsilon * sqrt (N).
- ## 5. PAIRWISE SUMMATION: MULTIPLE ACCUMULATORS
- An alternative way to add the four numbers, next to sequential summation,
- is to proceed in a *pairwise* manner, dividing the four numbers into two groups,
- then summing these separately, then summing the two results. In short C this would be:
- sum = (a + b) + (c + d);
- This pairwise summation continues recursively. For instance, you add 8 numbers this way:
- sum = ((a + b) + (c + d)) + ((e + f) + (g + h));
- ## 6. PAIRWISE SUMMATION IN MACHINE CODE
- In machine-like C, using the "registers" r1, r2, r3 and r4, you can implement this as
- r1 = a;
- r2 = b;
- r1 += r2;
- r3 = c;
- r4 = d;
- r3 += r4;
- r1 += r3;
- sum = r1;
- The number of loads, stores and additions is identical to those in the sequential case.
-
- ## 7. SPEED OF PAIRWISE SUMMATION
-
- Pairwise summation can be faster than sequential summation,
- because the addition of r2 to r1 can be performed in parallel to the addition of r4 to r3.
- The three additions in section 5 could therefore be performed in two clock cycles.
- Indeed, both compilers that I am using in 2017 (Clang on the Mac, and GCC on Windows and Linux)
- take only 0.30 nanoseconds per addition, which is just over two-thirds of the clock period
- of the 2.3 GHz processor (which is 0.435 nanoseconds) of my 2014 Macbook Pro.
- On a processor far, far away, which has more than 64 registers, perfect prefetching,
- and perfectly parallel operations on independent registers, 64 terms may be added
- within 6 clock cycles: six is the number of times we need to add something to r1,
- and each of these additions has to wait for the result of the previous addition.
- Theoretically, then, but not yet in practice, the execution time has fallen from order N
- to order log(N), at least for low N (and for high N it will be of order N,
- but with a much lower multiplication factor than sequential summation has).
- ## 8. PRECISION OF PAIRWISE SUMMATION
-
- The worst-case rounding error is only epsilon * log (N),
- and the expected random-walk rounding error is only epsilon * sqrt (log (N)).
- Both are much better than in the sequential summation case.
- ## 9. HOW TO USE PAIRWISE SUMMATION
- For pairwise summation we use the exact same macro arguments as for sequential summation:
-
- PAIRWISE_SUM (long double, sum, long, size, const double *xx = & x [1], *xx, xx += 1)
- printf ("%.17g", (double) sum);
- The macro starts by declaring a variable of type `AccumulatorType` and name `sumVariableName`.
- Thus, the expansion of the macro effectively starts with the declaration `long double sum;`
- and the variable `sum` will be available after the macro ends,
- so that you can use it in your subsequent code, as is done here by using `(double) sum`.
- This variable is used to accumulate the resulting sum and should for best results
- therefore be capable of accumulating partial sums with little rounding error.
- For this reason, you will usually want AccumulatorType to be `long double`.
- If long doubles are not available on your platform, or if they are slow on your platform,
- you can use `double` instead, or even `float`.
- The size of the array to sum comes in in the argument `sizeExpression`, and in the expansion
- of the macro this number and all macro-internal counters are of type `CounterType`.
- In the example above I used `long` for simplicity, but you should note that on 64-bit Windows
- a `long` is only 32 bits, whereas the natural type for array indexing on a 64-bit machine
- would be `int64_t`; I therefore tend to use a CounterType of `intptr_t` myself,
- because this is 32 bits on 32-bit machines and 64 bits on 64-bit machines.
- If your `size` is an `int` (always 32 bits nowadays), you would use a CounterType of `int`;
- it is not beneficial to use a wider CounterType than the width of your `size`.
- The fifth argument of the macro declares and initializes the loop pointer,
- as in `const double *xx = & x [1]` above. This pointer starts out pointing at the first
- element of the array.
- The sixth argument of the macro is an expression that should evaluate to the value of a term,
- given the current value of the loop pointer(s), as in `*xx` above.
- The seventh argument is the formula you use for incrementing the loop pointer(s),
- as in `xx += 1` above. The macro uses this formula to prepare for the retrieval of
- the next term in the summation.
-
- More complicated use cases than a single array also exist. For instance, if you have to compute
- the inner product of the two arrays x [1..n] and y [1..n], you can do
-
- PAIRWISE_SUM (long double, inner, long, n,
- const double *xx = & x [1]; // the semicolon ensures that this line and the next form a single argument
- const double *yy = & y [1],
- (long double) *xx * (long double) *yy,
- (xx += 1, yy += 1)
- )
- printf ("%.17g", (double) inner);
- Note for the sixth argument: you can see here that you can do the two increments simultaneously
- by using parentheses and a comma; fortunately, the C macro preprocessor understands enough
- about parentheses to see that you mean the sixth argument to be a single argument.
- Note for the seventh argument: as 64-bit multiplication loses a lot of the precision of its
- two 64-bit operands, it is advisable to convert both operands to `long double`
- *before* the multiplication, as is done here. This usually costs no extra computation
- time (it can actually be faster). If you do
-
- PAIRWISE_SUM (long double, inner, long, n,
- const double *xx = & x [1]; const double *yy = & y [1], *xx * *yy, (++ xx, ++ yy))
- printf ("%.17g", (double) inner);
- instead, the conversion to `long double` is done (by the macro) *after* the multiplication,
- which is less precise.
- Other use cases include array multiplication with strides...
- PAIRWISE_SUM (long double, inner, long, n,
- const double *xx = & x [1]; // note the funny semicolon again
- const double *yy = & y [1],
- (long double) *xx * (long double) *yy,
- (xx += xstride, yy += ystride)
- )
- printf ("%.17g", (double) inner);
- ... and small-lag convolution...
- for (long i = 1; i <= xSize - kernelSize + 1; i ++) {
- PAIRWISE_SUM (long double, conv, long, kernelSize,
- const double *xx = & x [i];
- const double *filter = & kernel [kernelSize],
- (long double) *xx * (long double) *filter,
- (xx += 1, filter -= 1)
- )
- result [i] = conv;
- }
- ... and matrix multiplication, and computing norms.
- ## 10. IMPLEMENTATION OF PAIRWISE SUMMATION: LOW POWERS OF 2
-
- We have to implement pairwise summation with a macro,
- because the sixth and seventh arguments to PAIRWISE_SUM() have to be formulas for getting
- the next element to add. We use fixed formulas for adding 2, 4, 8, 16 or 32 terms,
- and a stack for the next 57 higher powers of 2, up to 2^62,
- so that our summation will work for N up to 2^63 - 1.
- The fixed formulas for the low powers of 2 are recursively defined macros:
- */
- #define PAIRWISE_SUM_1_TERM(AccumulatorType, accumulator, termExpression, incrementStatement) \
- AccumulatorType accumulator = termExpression; \
- incrementStatement;
- #define PAIRWISE_SUM_2_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_1_TERM (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_1_TERM (AccumulatorType, _r2, termExpression, incrementStatement) \
- accumulator += _r2; \
- }
- #define PAIRWISE_SUM_4_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_2_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_2_TERMS (AccumulatorType, _r3, termExpression, incrementStatement) \
- accumulator += _r3; \
- }
- #define PAIRWISE_SUM_8_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_4_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_4_TERMS (AccumulatorType, _r4, termExpression, incrementStatement) \
- accumulator += _r4; \
- }
- #define PAIRWISE_SUM_16_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_8_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_8_TERMS (AccumulatorType, _r5, termExpression, incrementStatement) \
- accumulator += _r5; \
- }
- #define PAIRWISE_SUM_32_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_16_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_16_TERMS (AccumulatorType, _r6, termExpression, incrementStatement) \
- accumulator += _r6; \
- }
- /*
- (The difference between the variable names "_r2", "_r3" and so on is not strictly needed,
- but making them the same would lead the compiler to issue warnings about shadowed variables.)
- */
- /*
- ## 11. IMPLEMENTATION OF PAIRWISE ADDITION: HIGH POWERS OF 2
-
- Higher powers of 2 than 2^5 (= 32) go on a stack. We sum 64 values at each stroke,
- and this requires a fixed formula for these 64 terms:
- */
- #define PAIRWISE_SUM_64_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_32_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_32_TERMS (AccumulatorType, _r7, termExpression, incrementStatement) \
- accumulator += _r7; \
- }
- #define PAIRWISE_SUM_128_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_64_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_64_TERMS (AccumulatorType, _r8, termExpression, incrementStatement) \
- accumulator += _r8; \
- }
- #define PAIRWISE_SUM_256_TERMS(AccumulatorType, accumulator, termExpression, incrementStatement) \
- PAIRWISE_SUM_128_TERMS (AccumulatorType, accumulator, termExpression, incrementStatement) \
- { \
- PAIRWISE_SUM_128_TERMS (AccumulatorType, _r9, termExpression, incrementStatement) \
- accumulator += _r9; \
- }
- /*
- A generalization about the timing of the summations in all the above macros
- is that r(i+1) is added to r(i) precisely when r(i+1) contains the same
- number of terms as r(i). This criterion for collapsing the partial sums
- is also used in the stack logic below.
- */
- #define PAIRWISE_SUM(AccumulatorType, sumVariableName, CounterType, sizeExpression, \
- initializeStatement, termExpression, incrementStatement) \
- \
- AccumulatorType sumVariableName = 0.0; \
- {/* scope */ \
- initializeStatement; \
- CounterType _n = sizeExpression; \
- if (_n & 1) { \
- PAIRWISE_SUM_1_TERM (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- if (_n & 2) { \
- PAIRWISE_SUM_2_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- if (_n & 4) { \
- PAIRWISE_SUM_4_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- if (_n & 8) { \
- PAIRWISE_SUM_8_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- if (_n & 16) { \
- PAIRWISE_SUM_16_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- if (_n & 32) { \
- PAIRWISE_SUM_32_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- sumVariableName += _partialSum; \
- } \
- const int _baseCasePower = 6; /* because the base case is 64 = 2^6 terms */ \
- CounterType _numberOfBaseCases = _n >> _baseCasePower; \
- if (_numberOfBaseCases != 0) { \
- /* */ \
- /* The value of _powers [0] stays at 0, to denote the bottom of the stack. */ \
- /* The maximum value of _powers [1] should be 62, which denotes that 2^62 terms */ \
- /* have been accumulated into _partialSumStack [1]. This the maximum, */ \
- /* because _n can be at most 2^63-1 (assuming CounterType is 64 bits). */ \
- /* The maximum value of _powers [2] should then be 61. */ \
- /* The maximum value of _powers [3] should be 60. */ \
- /* ... */ \
- /* The maximum value of _powers [57] should be 6. It ends there, because */ \
- /* 2^6 is the granularity with which base case sums are put on the stack. */ \
- /* The maximum value of _powers [58] should also be 6, */ \
- /* because this can be the situation just before collapsing the top of the stack. */ \
- /* However, if the whole stack is filled up like this, the actual number of */ \
- /* terms is already 2^63 (i.e. 2^62 + 2^61 + 2^60 + ... 2^6 + 2^6). Therefore, we */ \
- /* need one element less, so the highest index of _powers [] should be 57. */ \
- /* For 32-bit counters, this highest index is 25. */ \
- /* */ \
- const int _numberOfBitsInCounterType = 8 * sizeof (CounterType); /* 64 or 32 */ \
- const int _highestIndex = _numberOfBitsInCounterType - 1 - _baseCasePower; \
- AccumulatorType _partialSumStack [1 + _highestIndex]; /* 8 bytes too many, but better code */ \
- unsigned char _powers [1 + _highestIndex]; \
- _powers [0] = 0; \
- int _stackPointer = 0; \
- for (CounterType _ipart = 1; _ipart <= _numberOfBaseCases; _ipart ++) { \
- /* */ \
- /* Compute the sum of the next 64 data points. */ \
- /* */ \
- PAIRWISE_SUM_64_TERMS (AccumulatorType, _partialSum, termExpression, incrementStatement) \
- /* */ \
- /* Put this sum on top of the stack. */ \
- /* */ \
- _partialSumStack [++ _stackPointer] = _partialSum; \
- _powers [_stackPointer] = _baseCasePower; \
- /* */ \
- /* The collapse criterion: */ \
- /* */ \
- while (_powers [_stackPointer] == _powers [_stackPointer - 1]) { \
- _partialSumStack [_stackPointer - 1] += _partialSumStack [_stackPointer]; \
- _powers [-- _stackPointer] += 1; \
- } \
- } \
- /* */ \
- /* Add all the elements of the stack, starting at the top. */ \
- /* */ \
- while (_stackPointer > 0) { \
- sumVariableName += _partialSumStack [_stackPointer --]; \
- } \
- } \
- }
- /*
- Note that we don't do the usual trick with `do {...} while (0)` that would allow you to add
- a semicolon after the `PAIRWISE_SUM()` call. This is to prevent the suggestion that the macro
- constitutes a single statement. The macro contains a sequence of two things: the definition
- of `sumVariableName` and a long block that usually changes the value of `sumVariableName`.
- Hence, the macro cannot be used as a single statement
- and e.g. has to be bracketed if used in an `else` clause.
- You are therefore advised to call `PAIRWISE_SUM()` without appending the misleading semicolon.
- */
- /*
- ## 12. SOME LESS GOOD SUMMATION ALGORITHMS
-
- The summation algorithm (at least for computing the mean) in the statistics software R
- is two-loop summation. This is fairly precise, but very slow:
- */
- #define TWO_LOOP_SUM(AccumulatorType, sumVariableName, CounterType, sizeExpression, \
- initializeStatement, termExpression, incrementStatement) \
- \
- AccumulatorType sumVariableName = 0.0; \
- {/* scope */ \
- CounterType _n = sizeExpression; \
- {/* scope */ \
- initializeStatement; \
- for (CounterType _i = 1; _i <= _n; _i ++) { \
- sumVariableName += termExpression; \
- incrementStatement; \
- } \
- } \
- AccumulatorType _mean = sumVariableName / _n; \
- {/* scope */ \
- sumVariableName = 0.0; \
- initializeStatement; \
- for (CounterType _i = 1; _i <= _n; _i ++) { \
- sumVariableName += (termExpression) - _mean; \
- incrementStatement; \
- } \
- sumVariableName += _mean * _n; \
- } \
- }
- /*
- Another one is the Kahan algorithm. Its precision is similar to that of pairwise summation,
- but it is extremely slow:
- */
- #define KAHAN_SUM(AccumulatorType, sumVariableName, CounterType, sizeExpression, \
- initializeStatement, termExpression, incrementStatement) \
- \
- AccumulatorType sumVariableName = 0.0; \
- {/* scope */ \
- initializeStatement; \
- CounterType _n = sizeExpression; \
- AccumulatorType _correction = 0.0; \
- for (CounterType _i = 1; _i <= _n; _i ++) { \
- AccumulatorType _correctedTerm = (termExpression) - _correction; \
- AccumulatorType _newSum = sumVariableName + _correctedTerm; \
- _correction = (_newSum - sumVariableName) - _correctedTerm; \
- sumVariableName = _newSum; \
- incrementStatement; \
- } \
- }
- /* End of file PAIRWISE_SUM.h */
- #endif
|