123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900 |
- /* arith07.c Copyright (C) 1990-2002 Codemist Ltd */
- /*
- * Arithmetic functions. negation plus a load of Common Lisp things
- * for support of complex numbers.
- *
- */
- /*
- * This code may be used and modified, and redistributed in binary
- * or source form, subject to the "CCL Public License", which should
- * accompany it. This license is a variant on the BSD license, and thus
- * permits use of code derived from this in either open and commercial
- * projects: but it does require that updates to this code be made
- * available back to the originators of the package.
- * Before merging other code in with this or linking this code
- * with other packages or libraries please check that the license terms
- * of the other material are compatible with those of this.
- */
- /* Signature: 0a772466 08-Apr-2002 */
- #include <stdarg.h>
- #include <string.h>
- #include <ctype.h>
- #include <math.h>
- #include "machine.h"
- #include "tags.h"
- #include "cslerror.h"
- #include "externs.h"
- #include "arith.h"
- #ifdef TIMEOUT
- #include "timeout.h"
- #endif
- Lisp_Object copyb(Lisp_Object a)
- /*
- * copy a bignum.
- */
- {
- Lisp_Object b, nil;
- int32 len = bignum_length(a), i;
- push(a);
- b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
- pop(a);
- errexit();
- len = (len-CELL)/4;
- for (i=0; i<len; i++)
- bignum_digits(b)[i] = bignum_digits(a)[i];
- return b;
- }
- Lisp_Object negateb(Lisp_Object a)
- /*
- * Negate a bignum. Note that negating the 1-word bignum
- * value of 0x08000000 will produce a fixnum as a result,
- * which might confuse the caller... in a similar way negating
- * the value -0x40000000 will need to promote from a one-word
- * bignum to a 2-word bignum. How messy just for negation!
- */
- {
- Lisp_Object b, nil;
- int32 len = bignum_length(a), i, carry;
- if (len == CELL+4) /* one-word bignum - do specially */
- { len = -(int32)bignum_digits(a)[0];
- if (len == fix_mask) return fixnum_of_int(len);
- else if (len == 0x40000000) return make_two_word_bignum(0, len);
- else return make_one_word_bignum(len);
- }
- push(a);
- b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
- pop(a);
- errexit();
- len = (len-CELL)/4-1;
- carry = -1;
- for (i=0; i<len; i++)
- { carry = clear_top_bit(~bignum_digits(a)[i]) + top_bit(carry);
- bignum_digits(b)[i] = clear_top_bit(carry);
- }
- /*
- * Handle the top digit separately since it is signed.
- */
- carry = ~bignum_digits(a)[i] + top_bit(carry);
- if (!signed_overflow(carry))
- {
- /*
- * If the most significant word ends up as -1 then I just might
- * have 0x40000000 in the next word down and so I may need to shrink
- * the number. Since I handled 1-word bignums specially I have at
- * least two words to deal with here.
- */
- if (carry == -1 && (bignum_digits(b)[i-1] & 0x40000000) != 0)
- { bignum_digits(b)[i-1] |= ~0x7fffffff;
- numhdr(b) -= pack_hdrlength(1);
- #ifdef ADDRESS_64
- if ((i & 1) != 0) bignum_digits(b)[i] = 0;
- else bignum_digits(b)[i] = make_bighdr(2);
- #else
- if ((i & 1) == 0) bignum_digits(b)[i] = 0;
- else bignum_digits(b)[i] = make_bighdr(2);
- #endif
- }
- else bignum_digits(b)[i] = carry; /* no shrinking needed */
- return b;
- }
- /*
- * Here I have overflow: this can only happen when I negate a number
- * that started off with 0xc0000000 in the most significant digit,
- * and I have to pad a zero word onto the front.
- */
- bignum_digits(b)[i] = clear_top_bit(carry);
- return lengthen_by_one_bit(b, carry);
- }
- /*
- * generic negation
- */
- Lisp_Object negate(Lisp_Object a)
- {
- #ifdef COMMON
- Lisp_Object nil; /* needed for errexit() */
- #endif
- switch ((int)a & TAG_BITS)
- {
- case TAG_FIXNUM:
- { int32 aa = -int_of_fixnum(a);
- /*
- * negating the number -#x8000000 (which is a fixnum) yields a value
- * which just fails to be a fixnum.
- */
- if (aa != 0x08000000) return fixnum_of_int(aa);
- else return make_one_word_bignum(aa);
- }
- #ifdef COMMON
- case TAG_SFLOAT:
- { Float_union aa;
- aa.i = a - TAG_SFLOAT;
- aa.f = (float) (-aa.f);
- return (aa.i & ~(int32)0xf) + TAG_SFLOAT;
- }
- #endif
- case TAG_NUMBERS:
- { int32 ha = type_of_header(numhdr(a));
- switch (ha)
- {
- case TYPE_BIGNUM:
- return negateb(a);
- #ifdef COMMON
- case TYPE_RATNUM:
- { Lisp_Object n = numerator(a),
- d = denominator(a);
- push(d);
- n = negate(n);
- pop(d);
- errexit();
- return make_ratio(n, d);
- }
- case TYPE_COMPLEX_NUM:
- { Lisp_Object r = real_part(a),
- i = imag_part(a);
- push(i);
- r = negate(r);
- pop(i);
- errexit();
- push(r);
- i = negate(i);
- pop(r);
- errexit();
- return make_complex(r, i);
- }
- #endif
- default:
- return aerror1("bad arg for minus", a);
- }
- }
- case TAG_BOXFLOAT:
- { double d = float_of_number(a);
- return make_boxfloat(-d, type_of_header(flthdr(a)));
- }
- default:
- return aerror1("bad arg for minus", a);
- }
- }
- /*****************************************************************************/
- /*** Transcendental functions etcetera. ***/
- /*****************************************************************************/
- /*
- * Much of the code here is extracted from the portable Fortran library
- * used by Codemist with its Fortran compiler.
- */
- /*
- * The object of the following macro is to adjust the floating point
- * variables concerned so that the more significant one can be squared
- * with NO LOSS OF PRECISION. It is only used when there is no danger
- * of over- or under-flow.
- *
- * This code is NOT PORTABLE but can be modified for use elsewhere
- * It should, however, serve for IEEE and IBM FP formats.
- */
- #define _fp_normalize(high, low) \
- { double temp; /* access to representation */ \
- temp = high; /* take original number */ \
- ((int32 *)&temp)[current_fp_rep & FP_WORD_ORDER] = 0; \
- /* make low part of mantissa 0 */ \
- low += (high - temp); /* add into low-order result */ \
- high = temp; \
- }
- #ifdef COMMON
- double MS_CDECL Cabs(Complex z)
- {
- /*
- * Obtain the absolute value of a complex number - note that the main
- * agony here is in ensuring that neither overflow nor underflow can
- * wreck the calculation. Given ideal arithmetic the sum could be carried
- * through as just sqrt(x^2 + y^2).
- */
- double x = z.real, y = z.imag;
- double scale;
- int n1, n2;
- if (x==0.0) return fabs(y);
- else if (y==0.0) return fabs(x);
- (void)frexp(x, &n1);
- (void)frexp(y, &n2);
- /* The exact range of values returned by frexp does not matter here */
- if (n2>n1) n1 = n2;
- /* n1 is now the exponent of the larger (in absolute value) of x, y */
- scale = ldexp(1.0, n1); /* can not be 0.0 */
- x /= scale;
- y /= scale;
- /* The above scaling operation introduces no rounding error (since the */
- /* scale factor is exactly a power of 2). It reduces the larger of x, y */
- /* to be somewhere near 1.0 so overflow in x*x+y*y is impossible. It is */
- /* still possible that one of x*x and y*y will underflow (but not both) */
- /* but this is harmless. */
- return scale * sqrt(x*x + y*y);
- }
- Complex MS_CDECL Ccos(Complex z)
- {
- double x = z.real, y = z.imag;
- /*
- * cos(x + iy) = cos(x)*cosh(y) - i sin(x)*sinh(y)
- * For smallish y this can be used directly. For |y| > 50 I will
- * compute sinh and cosh as just +/- exp(|y|)/2
- */
- double s = sin(x), c = cos(x);
- double absy = fabs(y);
- if (absy <= 50.0)
- { double sh = sinh(y), ch = cosh(y);
- z.real = c*ch;
- z.imag = - s*sh;
- return z;
- }
- else
- {
- double w;
- int n = _reduced_exp(absy, &w) - 1;
- z.real = ldexp(c*w, n);
- if (y < 0.0) z.imag = ldexp(s*w, n);
- else z.imag = ldexp(-s*w, n);
- return z;
- }
- }
- static double reduced_power(double a, int n)
- {
- /*
- * Compute (1 + a)^n - 1 avoiding undue roundoff error.
- * Assumes n >= 1 on entry and that a is small.
- */
- if (n == 1) return a;
- { double d = reduced_power(a, n/2);
- d = (2.0 + d)*d;
- if (n & 1) d += (1.0 + d)*a;
- return d;
- }
- }
- /*
- * The following values are included for documentation purposes - they
- * give the largest args that can be given to exp() without leading to
- * overflow on IBM mainframe and IEEE machines.
- * #ifdef IBMFLOAT
- * #define _exp_arg_limit 174.673089501106208
- * #else
- * #define _exp_arg_limit 709.78271289338397
- * #endif
- * Note that in any case exp(50.0) will not overflow (it is only 5.2e21),
- * so it can be evaluated the simple direct way.
- */
- int _reduced_exp(double x, double *r)
- {
- /*
- * (*r) = exp(x)/2^n; return n;
- * where n will be selected so that *r gets set to a fairly small value
- * (precise range of r unimportant provided it will be WELL away from
- * chances of overflow, even when exp(x) would actually overflow). This
- * function may only be called with argument x that is positive and
- * large enough that n will end up satisfying n>=1. The coding here
- * will ensure that if x>4.0, and in general the use of this function
- * will only be for x > 50.
- * For IBM hardware it would be good to be able to control the value
- * of n mod 4, maybe, to help counter wobbling precision. This is not
- * done here.
- */
- int n;
- double f;
- n = (int)(x / 7.625 + 0.5);
- /*
- * 7.625 = 61/8 and is expected to have an exact floating point
- * representation here, so f is computed without any rounding error.
- * (do I need something like the (x - 0.5) - 0.5 trick here?)
- */
- f = exp(x - 7.625*(double)n);
- /*
- * the magic constant is ((exp(61/8) / 2048) - 1) and it arises because
- * 61/88 is a decent rational approximation to log(2), hence exp(61/8)
- * is almost 2^11. Thus I compute exp(x) as
- * 2^(11*n) * (exp(61/8)/2^11)^n * exp(f)
- * The first factor is exact, the second is (1+e)^n where e is small and
- * n is an integer, so can be computer accurately, and the residue f at the
- * end is small enough not to give over-bad trouble.
- * The numeric constant given here was calculated with the REDUCE 3.3
- * bigfloat package.
- */
- #define _e61q8 3.81086435594567676751e-4
- *r = reduced_power(_e61q8, n)*f + f;
- #undef _e61q8
- return 11*n;
- }
- Complex MS_CDECL Cexp(Complex z)
- {
- double x = z.real, y = z.imag;
- /*
- * value is exp(x)*(cos(y) + i sin(y)) but have care with overflow
- * Here (and throughout the complex library) there is an opportunity
- * to save time by computing sin(y) and cos(y) together. Since this
- * code is (to begin with) to sit on top of an arbitrary C library,
- * perhaps with hardware support for the calculation of real-valued
- * trig functions I am not going to try to realise this saving.
- */
- double s = sin(y), c = cos(y);
- /*
- * if x > 50 I will use a cautious sceme which computes exp(x) with
- * its (binary) exponent separated. Note that 50.0 is chosen as a
- * number noticably smaller than _exp_arg_limit (exp(50) = 5.18e21),
- * but is not a critical very special number.
- */
- if (x <= 50.0) /* includes x < 0.0, of course */
- { double w = exp(x);
- z.real = w*c;
- z.imag = w*s;
- return z;
- }
- else
- { double w;
- int n = _reduced_exp(x, &w);
- z.real = ldexp(w*c, n);
- z.imag = ldexp(w*s, n);
- return z;
- }
- }
- Complex MS_CDECL Cln(Complex z)
- {
- double x = z.real, y = z.imag;
- /*
- * if x and y are both very large then cabs(z) may be out of range
- * even though log or if is OK. Thus it is necessary to perform an
- * elaborate scaled calculation here, and not just
- * z.real = log(cabs(z));
- */
- double scale, r;
- int n1, n2;
- if (x==0.0) r = log(fabs(y));
- else if (y==0.0) r = log(fabs(x));
- else
- {
- (void)frexp(x, &n1);
- (void)frexp(y, &n2);
- /* The exact range of values returned by frexp does not matter here */
- if (n2>n1) n1 = n2;
- scale = ldexp(1.0, n1);
- x /= scale;
- y /= scale;
- r = log(scale) + 0.5*log(x*x + y*y);
- }
- z.real = r;
- /*
- * The C standard is not very explicit about the behaviour of atan2(0.0, -n)
- * while for Fortran it is necessary that this returns +pi not -pi. Hence
- * with extreme caution I put a special test here.
- */
- if (y == 0.0)
- if (x < 0.0) z.imag = _pi;
- else z.imag = 0.0;
- else z.imag = atan2(y, x);
- return z;
- }
- /*
- * Complex raising to a power. This seems to be pretty nasty
- * to get right, and the code includes extra high precision variants
- * on atan() and log(). Further refinements wrt efficiency may be
- * possible later on.
- * This code has been partially tested, and seems to be uniformly
- * better than using just a**b = exp(b*log(a)), but much more careful
- * study is needed before it can possibly be claimed that it is
- * right in the sense of not throwing away accuracy when it does not
- * have to. I also need to make careful checks to verify that the
- * correct (principal) value is computed.
- */
- /*
- * The next function is used after arithmetic has been done on extra-
- * precision numbers so that the relationship between high and low parts
- * is no longer known. Re-instate it.
- */
- #define _two_minus_25 2.98023223876953125e-8 /* 2^(-25) */
- static double fp_add(double a, double b, double *lowres)
- {
- /* Result is the high part of a+b, with the low part assigned to *lowres */
- double absa, absb;
- if (a >= 0.0) absa = a; else absa = -a;
- if (b >= 0.0) absb = b; else absb = -b;
- if (absa < absb)
- { double t = a; a = b; b = t;
- }
- /* Now a is the larger (in absolute value) of the two numbers */
- if (absb > absa * _two_minus_25)
- { double al = 0.0, bl = 0.0;
- /*
- * If the exponent difference beweeen a and b is no more than 25 then
- * I can add the top part (20 or 24 bits) of a to the top part of b
- * without going beyond the 52 or 56 bits that a full mantissa can hold.
- */
- _fp_normalize(a, al);
- _fp_normalize(b, bl);
- a = a + b; /* No rounding needed here */
- b = al + bl;
- if (a == 0.0)
- { a = b;
- b = 0.0;
- }
- }
- /*
- * The above step leaves b small wrt the value in a (unless a+b led
- * to substantial cancellation of leading digits), but leaves the high
- * part a with bits everywhere. Force low part of a to zero.
- */
- { double al = 0.0;
- _fp_normalize(a, al);
- b = b + al;
- }
- if (a >= 0.0) absa = a; else absa = -a;
- if (b >= 0.0) absb = b; else absb = -b;
- if (absb > absa * _two_minus_25)
- /*
- * If on input a is close to -b, then a+b is close to zero. In this
- * case the exponents of a abd b matched, and so earlier calculations
- * have all been done exactly. Go around again to split residue into
- * high and low parts.
- */
- { double al = 0.0, bl = 0.0;
- _fp_normalize(b, bl);
- a = a + b;
- _fp_normalize(a, al);
- b = bl + al;
- }
- *lowres = b;
- return a;
- }
- #undef _two_minus_25
- static void extended_atan2(double b, double a, double *thetah, double *thetal)
- {
- int octant;
- double rh, rl, thh, thl;
- /*
- * First reduce the argument to the first octant (i.e. a, b both +ve,
- * and b <= a).
- */
- if (b < 0.0)
- { octant = 4;
- a = -a;
- b = -b;
- }
- else octant = 0;
- if (a < 0.0)
- { double t = a;
- octant += 2;
- a = b;
- b = -t;
- }
- if (b > a)
- { double t = a;
- octant += 1;
- a = b;
- b = t;
- }
- { static struct { double h; double l;} _atan[] = {
- /*
- * The table here gives atan(n/16) in 1.5-precision for n=0..16
- * Note that all the magic numbers used in this file were calculated
- * using the REDUCE bigfloat package, and all the 'exact' parts have
- * a denominator of at worst 2^26 and so are expected to have lots
- * of trailing zero bits in their floating point representation.
- */
- { 0.0, 0.0 },
- { 0.06241881847381591796875, -0.84778585694947708870e-8 },
- { 0.124355018138885498046875, -2.35921240630155201508e-8 },
- { 0.185347974300384521484375, -2.43046897565983490387e-8 },
- { 0.2449786663055419921875, -0.31786778380154175187e-8 },
- { 0.302884876728057861328125, -0.83530864557675689054e-8 },
- { 0.358770668506622314453125, 0.17639499059427950639e-8 },
- { 0.412410438060760498046875, 0.35366268088529162896e-8 },
- { 0.4636476039886474609375, 0.50121586552767562314e-8 },
- { 0.512389481067657470703125, -2.07569197640365239794e-8 },
- { 0.558599293231964111328125, 2.21115983246433832164e-8 },
- { 0.602287352085113525390625, -0.59501493437085023057e-8 },
- { 0.643501102924346923828125, 0.58689374629746842287e-8 },
- { 0.6823165416717529296875, 1.32029951485689299817e-8 },
- { 0.71882998943328857421875, 1.01883359311982641515e-8 },
- { 0.75315129756927490234375, -1.66070805128190106297e-8 },
- { 0.785398185253143310546875, -2.18556950009312141541e-8 }
- };
- int k = (int)(16.0*(b/a + 0.03125)); /* 0 to 16 */
- double kd = (double)k/16.0;
- double ah = a, al = 0.0,
- bh = b, bl = 0.0,
- ch, cl, q, q2;
- _fp_normalize(ah, al);
- _fp_normalize(bh, bl);
- ch = bh - ah*kd; cl = bl - al*kd;
- ah = ah + bh*kd; al = al + bl*kd;
- bh = ch; bl = cl;
- /* Now |(a/b)| <= 1/32 */
- ah = fp_add(ah, al, &al); /* Re-normalise */
- bh = fp_add(bh, bl, &bl);
- /* Compute approximation to b/a */
- rh = (bh + bl)/(ah + al); rl = 0.0;
- _fp_normalize(rh, rl);
- bh -= ah*rh; bl -= al*rh;
- rl = (bh + bl)/(ah + al); /* Quotient now formed */
- /*
- * Now it is necessary to compute atan(q) to one-and-a-half precision.
- * Since |q| < 1/32 I will leave just q as the high order word of
- * the result and compute atan(q)-q as a single precision value. This
- * gives about 16 bits accuracy beyond regular single precision work.
- */
- q = rh + rl;
- q2 = q*q;
- /* The expansion the follows could be done better using a minimax poly */
- rl -= q*q2*(0.33333333333333333333 -
- q2*(0.20000000000000000000 -
- q2*(0.14285714285714285714 -
- q2*(0.11111111111111111111 -
- q2* 0.09090909090909090909))));
- /* OK - now (rh, rl) is atan(reduced b/a). Need to add on atan(kd) */
- rh += _atan[k].h;
- rl += _atan[k].l;
- }
- /*
- * The following constants give high precision versions of pi and pi/2,
- * and the high partwords (p2h and pih) have lots of low order zero bits
- * in their binary representation. Expect (=require) that the arithmetic
- * that computes thh is done without introduced rounding error.
- */
- #define _p2h 1.57079632580280303955078125
- #define _p2l 9.92093579680540441639751e-10
- #define _pih 3.14159265160560607910156250
- #define _pil 1.984187159361080883279502e-9
- switch (octant)
- {
- default:
- case 0: thh = rh; thl = rl; break;
- case 1: thh = _p2h - rh; thl = _p2l - rl; break;
- case 2: thh = _p2h + rh; thl = _p2l + rl; break;
- case 3: thh = _pih - rh; thl = _pil - rl; break;
- case 4: thh = -_pih + rh; thl = -_pil + rl; break;
- case 5: thh = -_p2h - rh; thl = -_p2l - rl; break;
- case 6: thh = -_p2h + rh; thl = -_p2l + rl; break;
- case 7: thh = -rh; thl = -rl; break;
- }
- #undef _p2h
- #undef _p2l
- #undef _pih
- #undef _pil
- *thetah = fp_add(thh, thl, thetal);
- }
- static void extended_log(int k, double a, double b,
- double *logrh, double *logrl)
- {
- /*
- * If we had exact arithmetic this procedure could be:
- * k*log(2) + 0.5*log(a^2 + b^2)
- */
- double al = 0.0, bl = 0.0, all = 0.0, bll = 0.0, c, ch, cl, cll;
- double w, q, qh, ql, rh, rl;
- int n;
- /*
- * First (a^2 + b^2) is calculated, using extra precision.
- * Because any rounding at this stage can lead to bad errors in
- * the power that I eventually want to compute, I use 3-word
- * arithmetic here, and with the version of _fp_normalize given
- * above and IEEE or IBM370 arithmetic this part of the
- * computation is exact.
- */
- _fp_normalize(a, al); _fp_normalize(al, all);
- _fp_normalize(b, bl); _fp_normalize(bl, bll);
- ch = a*a + b*b;
- cl = 2.0*(a*al + b*bl);
- cll = (al*al + bl*bl) +
- all*(2.0*(a + al) + all) +
- bll*(2.0*(b + bl) + bll);
- _fp_normalize(ch, cl);
- _fp_normalize(cl, cll);
- c = ch + (cl + cll); /* single precision approximation */
- /*
- * At this stage the scaling of the input value will mean that we
- * have 0.25 <= c <= 2.0
- *
- * Now rewrite things as
- * (2*k + n)*log(s) + 0.5*log((a^2 + b^2)/2^n))
- * where s = sqrt(2)
- * and where the arg to the log is in sqrt(0.5), sqrt(2)
- */
- #define _sqrt_half 0.70710678118654752440
- #define _sqrt_two 1.41421356237309504880
- k = 2*k;
- while (c < _sqrt_half)
- { k -= 1;
- ch *= 2.0;
- cl *= 2.0;
- cll *= 2.0;
- c *= 2.0;
- }
- while (c > _sqrt_two)
- { k += 1;
- ch *= 0.5;
- cl *= 0.5;
- cll *= 0.5;
- c *= 0.5;
- }
- #undef _sqrt_half
- #undef _sqrt_two
- n = (int)(16.0/c + 0.5);
- w = (double)n / 16.0;
- ch *= w;
- cl *= w;
- cll *= w; /* Now |c-1| < 0.04317 */
- ch = (ch - 0.5) - 0.5;
- ch = fp_add(ch, cl, &cl);
- cl = cl + cll;
- /*
- * (ch, cl) is now the reduced argument ready for calculating log(1+c),
- * and now that the reduction is over I can drop back to the use of just
- * two doubleprecision values to represent c.
- */
- c = ch + cl;
- qh = c / (2.0 + c);
- ql = 0.0;
- _fp_normalize(qh, ql);
- ql = ((ch - qh*(2.0 + ch)) + cl - qh*cl) / (2.0 + c);
- /* (qh, ql) is now c/(2.0 + c) */
- rh = qh; /* 18 bits bigger than low part will end up */
- q = qh + ql;
- w = q*q;
- rl = ql + q*w*(0.33333333333333333333 +
- w*(0.20000000000000000000 +
- w*(0.14285714285714285714 +
- w*(0.11111111111111111111 +
- w*(0.09090909090909090909)))));
- /*
- * (rh, rl) is now atan(c) correct to double precision plus about 18 bits.
- */
- { double temp;
- static struct { double h; double l; } _log_table[] = {
- /*
- * The following values are (in extra precision) -log(n/16)/2 for n
- * in the range 11 to 23 (i.e. roughly over sqrt(2)/2 to sqrt(2))
- */
- { 0.1873466968536376953125, 2.786706765149099245e-8 },
- { 0.1438410282135009765625, 0.801238948715710950e-8 },
- { 0.103819668292999267578125, 1.409612298322959552e-8 },
- { 0.066765725612640380859375, -2.930037906928620319e-8 },
- { 0.0322692394256591796875, 2.114312640614896196e-8 },
- { 0.0, 0.0 },
- { -0.0303122997283935546875, -1.117982386660280307e-8 },
- { -0.0588915348052978515625, 1.697710612429310295e-8 },
- { -0.08592510223388671875, -2.622944289242004947e-8 },
- { -0.111571788787841796875, 1.313073691899185245e-8 },
- { -0.135966837406158447265625, -2.033566243215020975e-8 },
- { -0.159226894378662109375, 2.881939480146987639e-8 },
- { -0.18145275115966796875, 0.431498374218108783e-8 }};
- rh = fp_add(rh, _log_table[n-11].h, &temp);
- rl = temp + _log_table[n-11].l + rl;
- }
- #define _exact_part_logroot2 0.3465735912322998046875
- #define _approx_part_logroot2 (-9.5232714997888393927e-10)
- /* Multiply this by k and add it in */
- { double temp, kd = (double)k;
- rh = fp_add(rh, kd*_exact_part_logroot2, &temp);
- rl = rl + temp + kd*_approx_part_logroot2;
- }
- #undef _exact_part_logroot2
- #undef _approx_part_logroot2
- *logrh = rh;
- *logrl = rl;
- return;
- }
- Complex MS_CDECL Cpow(Complex z1, Complex z2)
- {
- double a = z1.real, b = z1.imag,
- c = z2.real, d = z2.imag;
- int k, m, n;
- double scale;
- double logrh, logrl, thetah, thetal;
- double r, i, rh, rl, ih, il, clow, dlow, q;
- double cw, sw, cost, sint;
- if (b == 0.0 && d == 0.0 && a >= 0.0)/* Simple case if both args are real */
- { z1.real = pow(a, c);
- z1.imag = 0.0;
- return z1;
- }
- /*
- * Start by scaling z1 by dividing out a power of 2 so that |z1| is
- * fairly close to 1
- */
- if (a == 0.0)
- { if (b == 0.0) return z1; /* 0.0**anything is really an error */
- /* The exact values returned by frexp do not matter here */
- (void)frexp(b, &k);
- }
- else
- { (void)frexp(a, &k);
- if (b != 0.0)
- { int n;
- (void)frexp(b, &n);
- if (n > k) k = n;
- }
- }
- scale = ldexp(1.0, k);
- a /= scale;
- b /= scale;
- /*
- * The idea next is to express z1 as r*exp(i theta), then z1**z2
- * is exp(z2*log(z1)) and the exponent simplifies to
- * (c*log r - d*theta) + i(c theta + d log r).
- * Note r = scale*sqrt(a*a + b*b) now.
- * The argument for exp() must be computed to noticably more than
- * regular precision, since otherwise exp() greatly magnifies the
- * error in the real part (if it is large and positive) and range
- * reduction in the imaginary part can equally lead to accuracy
- * problems. Neither c nor d can usefully be anywhere near the
- * extreme range of floating point values, so I will not even try
- * to scale them, thus I will end up happy(ish) if I can compute
- * atan2(b, a) and log(|z1|) in one-and-a-half precision form.
- * It would be best to compute theta in units of pi/4 rather than in
- * raidians, since then the case of z^n (integer n) with arg(z) an
- * exact multiple of pi/4 would work out better. However at present I
- * just hope that the 1.5-precision working is good enough.
- */
- extended_atan2(b, a, &thetah, &thetal);
- extended_log(k, a, b, &logrh, &logrl);
- /* Normalise all numbers */
- clow = 0.0;
- dlow = 0.0;
- _fp_normalize(c, clow);
- _fp_normalize(d, dlow);
- /* (rh, rl) = c*logr - d*theta; */
- rh = c*logrh - d*thetah; /* No rounding in this computation */
- rl = c*logrl + clow*(logrh + logrl) - d*thetal - dlow*(thetah + thetal);
- /* (ih, il) = c*theta + d*logr; */
- ih = c*thetah + d*logrh; /* No rounding in this computation */
- il = c*thetal + clow*(thetah + thetal) + d*logrl + dlow*(logrh + logrl);
- /*
- * Now it remains to take the exponential of the extended precision
- * value in ((rh, rl), (ih, il)).
- * Range reduce the real part by multiples of log(2), and the imaginary
- * part by multiples of pi/2.
- */
- rh = fp_add(rh, rl, &rl); /* renormalise */
- r = rh + rl; /* Approximate value */
- #define _recip_log_2 1.4426950408889634074
- q = r * _recip_log_2;
- m = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
- q = (double)m;
- #undef _recip_log_2
- /*
- * log 2 = 11629080/2^24 - 0.000 00000 19046 54299 95776 78785
- * to reasonable accuracy. It is vital that the exact part be
- * read in as a number with lots of low zero bits, so when it gets
- * multiplied by the integer q there is NO rounding needed.
- */
- #define _exact_part_log2 0.693147182464599609375
- #define _approx_part_log2 (-1.9046542999577678785418e-9)
- rh = rh - q*_exact_part_log2;
- rl = rl - q*_approx_part_log2;
- #undef _exact_part_log2
- #undef _approx_part_log2
- r = exp(rh + rl); /* This should now be accurate enough */
- ih = fp_add(ih, il, &il);
- i = ih + il; /* Approximate value */
- #define _recip_pi_by_2 0.6366197723675813431
- q = i * _recip_pi_by_2;
- n = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
- q = (double)n;
- /*
- * pi/2 = 105414357/2^26 + 0.000 00000 09920 93579 68054 04416 39751
- * to reasonable accuracy. It is vital that the exact part be
- * read in as a number with lots of low zero bits, so when it gets
- * multiplied by the integer q there is NO rounding needed.
- */
- #define _exact_part_pi2 1.57079632580280303955078125
- #define _approx_part_pi2 9.92093579680540441639751e-10
- ih = ih - q*_exact_part_pi2;
- il = il - q*_approx_part_pi2;
- #undef _recip_pi_by_2
- #undef _exact_part_pi2
- #undef _approx_part_pi2
- i = ih + il; /* Now accurate enough */
- /*
- * Having done super-careful range reduction I can call the regular
- * sin/cos routines here. If sin/cos could both be computed together
- * that could speed things up a little bit, but by this stage they have
- * not much in common.
- */
- cw = cos(i);
- sw = sin(i);
- switch (n & 3) /* quadrant control */
- {
- default:
- case 0: cost = cw; sint = sw; break;
- case 1: cost = -sw; sint = cw; break;
- case 2: cost = -cw; sint = -sw; break;
- case 3: cost = sw; sint = -cw; break;
- }
- /*
- * Now, at long last, I can assemble the results and return.
- */
- z1.real = ldexp(r*cost, m);
- z1.imag = ldexp(r*sint, m);
- return z1;
- }
- /*
- * End of complex-to-complex-power code.
- */
- #endif
- /* end of arith07.c */
|