arith07.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884
  1. /* arith07.c Copyright (C) 1990 Codemist Ltd */
  2. /*
  3. * Arithmetic functions. negation plus a load of Common Lisp things
  4. * for support of complex numbers.
  5. *
  6. * Version 1.2 May 1990.
  7. */
  8. /* Signature: 6048ef3e 31-May-1997 */
  9. #include <stdarg.h>
  10. #include <string.h>
  11. #include <ctype.h>
  12. #include <math.h>
  13. #include "machine.h"
  14. #include "tags.h"
  15. #include "cslerror.h"
  16. #include "externs.h"
  17. #include "arith.h"
  18. #ifdef TIMEOUT
  19. #include "timeout.h"
  20. #endif
  21. Lisp_Object copyb(Lisp_Object a)
  22. /*
  23. * copy a bignum.
  24. */
  25. {
  26. Lisp_Object b, nil;
  27. int32 len = bignum_length(a), i;
  28. push(a);
  29. b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
  30. pop(a);
  31. errexit();
  32. len = (len >> 2) - 1;
  33. for (i=0; i<len; i++)
  34. bignum_digits(b)[i] = bignum_digits(a)[i];
  35. return b;
  36. }
  37. Lisp_Object negateb(Lisp_Object a)
  38. /*
  39. * Negate a bignum. Note that negating the 1-word bignum
  40. * value of 0x08000000 will produce a fixnum as a result,
  41. * which might confuse the caller... in a similar way negating
  42. * the value -0x40000000 will need to promote from a one-word
  43. * bignum to a 2-word bignum. How messy just for negation!
  44. */
  45. {
  46. Lisp_Object b, nil;
  47. int32 len = bignum_length(a), i, carry;
  48. if (len == 8) /* one-word bignum - do specially */
  49. { len = -(int32)bignum_digits(a)[0];
  50. if (len == fix_mask) return fixnum_of_int(len);
  51. else if (len == 0x40000000) return make_two_word_bignum(0, len);
  52. else return make_one_word_bignum(len);
  53. }
  54. push(a);
  55. b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
  56. pop(a);
  57. errexit();
  58. len = (len >> 2) - 2;
  59. carry = -1;
  60. for (i=0; i<len; i++)
  61. { carry = clear_top_bit(~bignum_digits(a)[i]) + top_bit(carry);
  62. bignum_digits(b)[i] = clear_top_bit(carry);
  63. }
  64. /*
  65. * Handle the top digit separately since it is signed.
  66. */
  67. carry = ~bignum_digits(a)[i] + top_bit(carry);
  68. if (!signed_overflow(carry))
  69. {
  70. /*
  71. * If the most significant word ends up as -1 then I just might
  72. * have 0x40000000 in the next word down and so I may need to shrink
  73. * the number. Since I handled 1-word bignums specially I have at
  74. * least two words to deal with here.
  75. */
  76. if (carry == -1 && (bignum_digits(b)[i-1] & 0x40000000) != 0)
  77. { bignum_digits(b)[i-1] |= ~0x7fffffff;
  78. numhdr(b) -= pack_hdrlength(1);
  79. if ((i & 1) == 0) bignum_digits(b)[i] = 0;
  80. else bignum_digits(b)[i] = make_bighdr(2);
  81. }
  82. else bignum_digits(b)[i] = carry; /* no shrinking needed */
  83. return b;
  84. }
  85. /*
  86. * Here I have overflow: this can only happen when I negate a number
  87. * that started off with 0xc0000000 in the most significant digit,
  88. * and I have to pad a zero word onto the front.
  89. */
  90. bignum_digits(b)[i] = clear_top_bit(carry);
  91. return lengthen_by_one_bit(b, carry);
  92. }
  93. /*
  94. * generic negation
  95. */
  96. Lisp_Object negate(Lisp_Object a)
  97. {
  98. #ifdef COMMON
  99. Lisp_Object nil; /* needed for errexit() */
  100. #endif
  101. switch ((int)a & TAG_BITS)
  102. {
  103. case TAG_FIXNUM:
  104. { int32 aa = -int_of_fixnum(a);
  105. /*
  106. * negating the number -#x8000000 (which is a fixnum) yields a value
  107. * which just fails to be a fixnum.
  108. */
  109. if (aa != 0x08000000) return fixnum_of_int(aa);
  110. else return make_one_word_bignum(aa);
  111. }
  112. #ifdef COMMON
  113. case TAG_SFLOAT:
  114. { Float_union aa;
  115. aa.i = a - TAG_SFLOAT;
  116. aa.f = (float) (-aa.f);
  117. return (aa.i & ~(int32)0xf) + TAG_SFLOAT;
  118. }
  119. #endif
  120. case TAG_NUMBERS:
  121. { int32 ha = type_of_header(numhdr(a));
  122. switch (ha)
  123. {
  124. case TYPE_BIGNUM:
  125. return negateb(a);
  126. #ifdef COMMON
  127. case TYPE_RATNUM:
  128. { Lisp_Object n = numerator(a),
  129. d = denominator(a);
  130. push(d);
  131. n = negate(n);
  132. pop(d);
  133. errexit();
  134. return make_ratio(n, d);
  135. }
  136. case TYPE_COMPLEX_NUM:
  137. { Lisp_Object r = real_part(a),
  138. i = imag_part(a);
  139. push(i);
  140. r = negate(r);
  141. pop(i);
  142. errexit();
  143. push(r);
  144. i = negate(i);
  145. pop(r);
  146. errexit();
  147. return make_complex(r, i);
  148. }
  149. #endif
  150. default:
  151. return aerror1("bad arg for minus", a);
  152. }
  153. }
  154. case TAG_BOXFLOAT:
  155. { double d = float_of_number(a);
  156. return make_boxfloat(-d, type_of_header(flthdr(a)));
  157. }
  158. default:
  159. return aerror1("bad arg for minus", a);
  160. }
  161. }
  162. /*****************************************************************************/
  163. /*** Transcendental functions etcetera. ***/
  164. /*****************************************************************************/
  165. /*
  166. * Much of the code here is extracted from the portable Fortran library
  167. * used by Codemist with its Fortran compiler.
  168. */
  169. /*
  170. * The object of the following macro is to adjust the floating point
  171. * variables concerned so that the more significant one can be squared
  172. * with NO LOSS OF PRECISION. It is only used when there is no danger
  173. * of over- or under-flow.
  174. *
  175. * This code is NOT PORTABLE but can be modified for use elsewhere
  176. * It should, however, serve for IEEE and IBM FP formats.
  177. */
  178. #define _fp_normalize(high, low) \
  179. { double temp; /* access to representation */ \
  180. temp = high; /* take original number */ \
  181. ((int32 *)&temp)[current_fp_rep & FP_WORD_ORDER] = 0; \
  182. /* make low part of mantissa 0 */ \
  183. low += (high - temp); /* add into low-order result */ \
  184. high = temp; \
  185. }
  186. #ifdef COMMON
  187. double MS_CDECL Cabs(Complex z)
  188. {
  189. /*
  190. * Obtain the absolute value of a complex number - note that the main
  191. * agony here is in ensuring that neither overflow nor underflow can
  192. * wreck the calculation. Given ideal arithmetic the sum could be carried
  193. * through as just sqrt(x^2 + y^2).
  194. */
  195. double x = z.real, y = z.imag;
  196. double scale;
  197. int n1, n2;
  198. if (x==0.0) return fabs(y);
  199. else if (y==0.0) return fabs(x);
  200. (void)frexp(x, &n1);
  201. (void)frexp(y, &n2);
  202. /* The exact range of values returned by frexp does not matter here */
  203. if (n2>n1) n1 = n2;
  204. /* n1 is now the exponent of the larger (in absolute value) of x, y */
  205. scale = ldexp(1.0, n1); /* can not be 0.0 */
  206. x /= scale;
  207. y /= scale;
  208. /* The above scaling operation introduces no rounding error (since the */
  209. /* scale factor is exactly a power of 2). It reduces the larger of x, y */
  210. /* to be somewhere near 1.0 so overflow in x*x+y*y is impossible. It is */
  211. /* still possible that one of x*x and y*y will underflow (but not both) */
  212. /* but this is harmless. */
  213. return scale * sqrt(x*x + y*y);
  214. }
  215. Complex MS_CDECL Ccos(Complex z)
  216. {
  217. double x = z.real, y = z.imag;
  218. /*
  219. * cos(x + iy) = cos(x)*cosh(y) - i sin(x)*sinh(y)
  220. * For smallish y this can be used directly. For |y| > 50 I will
  221. * compute sinh and cosh as just +/- exp(|y|)/2
  222. */
  223. double s = sin(x), c = cos(x);
  224. double absy = fabs(y);
  225. if (absy <= 50.0)
  226. { double sh = sinh(y), ch = cosh(y);
  227. z.real = c*ch;
  228. z.imag = - s*sh;
  229. return z;
  230. }
  231. else
  232. {
  233. double w;
  234. int n = _reduced_exp(absy, &w) - 1;
  235. z.real = ldexp(c*w, n);
  236. if (y < 0.0) z.imag = ldexp(s*w, n);
  237. else z.imag = ldexp(-s*w, n);
  238. return z;
  239. }
  240. }
  241. static double reduced_power(double a, int n)
  242. {
  243. /*
  244. * Compute (1 + a)^n - 1 avoiding undue roundoff error.
  245. * Assumes n >= 1 on entry and that a is small.
  246. */
  247. if (n == 1) return a;
  248. { double d = reduced_power(a, n/2);
  249. d = (2.0 + d)*d;
  250. if (n & 1) d += (1.0 + d)*a;
  251. return d;
  252. }
  253. }
  254. /*
  255. * The following values are included for documentation purposes - they
  256. * give the largest args that can be given to exp() without leading to
  257. * overflow on IBM mainframe and IEEE machines.
  258. * #ifdef IBMFLOAT
  259. * #define _exp_arg_limit 174.673089501106208
  260. * #else
  261. * #define _exp_arg_limit 709.78271289338397
  262. * #endif
  263. * Note that in any case exp(50.0) will not overflow (it is only 5.2e21),
  264. * so it can be evaluated the simple direct way.
  265. */
  266. int _reduced_exp(double x, double *r)
  267. {
  268. /*
  269. * (*r) = exp(x)/2^n; return n;
  270. * where n will be selected so that *r gets set to a fairly small value
  271. * (precise range of r unimportant provided it will be WELL away from
  272. * chances of overflow, even when exp(x) would actually overflow). This
  273. * function may only be called with argument x that is positive and
  274. * large enough that n will end up satisfying n>=1. The coding here
  275. * will ensure that if x>4.0, and in general the use of this function
  276. * will only be for x > 50.
  277. * For IBM hardware it would be good to be able to control the value
  278. * of n mod 4, maybe, to help counter wobbling precision. This is not
  279. * done here.
  280. */
  281. int n;
  282. double f;
  283. n = (int)(x / 7.625 + 0.5);
  284. /*
  285. * 7.625 = 61/8 and is expected to have an exact floating point
  286. * representation here, so f is computed without any rounding error.
  287. * (do I need something like the (x - 0.5) - 0.5 trick here?)
  288. */
  289. f = exp(x - 7.625*(double)n);
  290. /*
  291. * the magic constant is ((exp(61/8) / 2048) - 1) and it arises because
  292. * 61/88 is a decent rational approximation to log(2), hence exp(61/8)
  293. * is almost 2^11. Thus I compute exp(x) as
  294. * 2^(11*n) * (exp(61/8)/2^11)^n * exp(f)
  295. * The first factor is exact, the second is (1+e)^n where e is small and
  296. * n is an integer, so can be computer accurately, and the residue f at the
  297. * end is small enough not to give over-bad trouble.
  298. * The numeric constant given here was calculated with the REDUCE 3.3
  299. * bigfloat package.
  300. */
  301. #define _e61q8 3.81086435594567676751e-4
  302. *r = reduced_power(_e61q8, n)*f + f;
  303. #undef _e61q8
  304. return 11*n;
  305. }
  306. Complex MS_CDECL Cexp(Complex z)
  307. {
  308. double x = z.real, y = z.imag;
  309. /*
  310. * value is exp(x)*(cos(y) + i sin(y)) but have care with overflow
  311. * Here (and throughout the complex library) there is an opportunity
  312. * to save time by computing sin(y) and cos(y) together. Since this
  313. * code is (to begin with) to sit on top of an arbitrary C library,
  314. * perhaps with hardware support for the calculation of real-valued
  315. * trig functions I am not going to try to realise this saving.
  316. */
  317. double s = sin(y), c = cos(y);
  318. /*
  319. * if x > 50 I will use a cautious sceme which computes exp(x) with
  320. * its (binary) exponent separated. Note that 50.0 is chosen as a
  321. * number noticably smaller than _exp_arg_limit (exp(50) = 5.18e21),
  322. * but is not a critical very special number.
  323. */
  324. if (x <= 50.0) /* includes x < 0.0, of course */
  325. { double w = exp(x);
  326. z.real = w*c;
  327. z.imag = w*s;
  328. return z;
  329. }
  330. else
  331. { double w;
  332. int n = _reduced_exp(x, &w);
  333. z.real = ldexp(w*c, n);
  334. z.imag = ldexp(w*s, n);
  335. return z;
  336. }
  337. }
  338. Complex MS_CDECL Cln(Complex z)
  339. {
  340. double x = z.real, y = z.imag;
  341. /*
  342. * if x and y are both very large then cabs(z) may be out of range
  343. * even though log or if is OK. Thus it is necessary to perform an
  344. * elaborate scaled calculation here, and not just
  345. * z.real = log(cabs(z));
  346. */
  347. double scale, r;
  348. int n1, n2;
  349. if (x==0.0) r = log(fabs(y));
  350. else if (y==0.0) r = log(fabs(x));
  351. else
  352. {
  353. (void)frexp(x, &n1);
  354. (void)frexp(y, &n2);
  355. /* The exact range of values returned by frexp does not matter here */
  356. if (n2>n1) n1 = n2;
  357. scale = ldexp(1.0, n1);
  358. x /= scale;
  359. y /= scale;
  360. r = log(scale) + 0.5*log(x*x + y*y);
  361. }
  362. z.real = r;
  363. /*
  364. * The C standard is not very explicit about the behaviour of atan2(0.0, -n)
  365. * while for Fortran it is necessary that this returns +pi not -pi. Hence
  366. * with extreme caution I put a special test here.
  367. */
  368. if (y == 0.0)
  369. if (x < 0.0) z.imag = _pi;
  370. else z.imag = 0.0;
  371. else z.imag = atan2(y, x);
  372. return z;
  373. }
  374. /*
  375. * Complex raising to a power. This seems to be pretty nasty
  376. * to get right, and the code includes extra high precision variants
  377. * on atan() and log(). Further refinements wrt efficiency may be
  378. * possible later on.
  379. * This code has been partially tested, and seems to be uniformly
  380. * better than using just a**b = exp(b*log(a)), but much more careful
  381. * study is needed before it can possibly be claimed that it is
  382. * right in the sense of not throwing away accuracy when it does not
  383. * have to. I also need to make careful checks to verify that the
  384. * correct (principal) value is computed.
  385. */
  386. /*
  387. * The next function is used after arithmetic has been done on extra-
  388. * precision numbers so that the relationship between high and low parts
  389. * is no longer known. Re-instate it.
  390. */
  391. #define _two_minus_25 2.98023223876953125e-8 /* 2^(-25) */
  392. static double fp_add(double a, double b, double *lowres)
  393. {
  394. /* Result is the high part of a+b, with the low part assigned to *lowres */
  395. double absa, absb;
  396. if (a >= 0.0) absa = a; else absa = -a;
  397. if (b >= 0.0) absb = b; else absb = -b;
  398. if (absa < absb)
  399. { double t = a; a = b; b = t;
  400. }
  401. /* Now a is the larger (in absolute value) of the two numbers */
  402. if (absb > absa * _two_minus_25)
  403. { double al = 0.0, bl = 0.0;
  404. /*
  405. * If the exponent difference beweeen a and b is no more than 25 then
  406. * I can add the top part (20 or 24 bits) of a to the top part of b
  407. * without going beyond the 52 or 56 bits that a full mantissa can hold.
  408. */
  409. _fp_normalize(a, al);
  410. _fp_normalize(b, bl);
  411. a = a + b; /* No rounding needed here */
  412. b = al + bl;
  413. if (a == 0.0)
  414. { a = b;
  415. b = 0.0;
  416. }
  417. }
  418. /*
  419. * The above step leaves b small wrt the value in a (unless a+b led
  420. * to substantial cancellation of leading digits), but leaves the high
  421. * part a with bits everywhere. Force low part of a to zero.
  422. */
  423. { double al = 0.0;
  424. _fp_normalize(a, al);
  425. b = b + al;
  426. }
  427. if (a >= 0.0) absa = a; else absa = -a;
  428. if (b >= 0.0) absb = b; else absb = -b;
  429. if (absb > absa * _two_minus_25)
  430. /*
  431. * If on input a is close to -b, then a+b is close to zero. In this
  432. * case the exponents of a abd b matched, and so earlier calculations
  433. * have all been done exactly. Go around again to split residue into
  434. * high and low parts.
  435. */
  436. { double al = 0.0, bl = 0.0;
  437. _fp_normalize(b, bl);
  438. a = a + b;
  439. _fp_normalize(a, al);
  440. b = bl + al;
  441. }
  442. *lowres = b;
  443. return a;
  444. }
  445. #undef _two_minus_25
  446. static void extended_atan2(double b, double a, double *thetah, double *thetal)
  447. {
  448. int octant;
  449. double rh, rl, thh, thl;
  450. /*
  451. * First reduce the argument to the first octant (i.e. a, b both +ve,
  452. * and b <= a).
  453. */
  454. if (b < 0.0)
  455. { octant = 4;
  456. a = -a;
  457. b = -b;
  458. }
  459. else octant = 0;
  460. if (a < 0.0)
  461. { double t = a;
  462. octant += 2;
  463. a = b;
  464. b = -t;
  465. }
  466. if (b > a)
  467. { double t = a;
  468. octant += 1;
  469. a = b;
  470. b = t;
  471. }
  472. { static struct { double h; double l;} _atan[] = {
  473. /*
  474. * The table here gives atan(n/16) in 1.5-precision for n=0..16
  475. * Note that all the magic numbers used in this file were calculated
  476. * using the REDUCE bigfloat package, and all the 'exact' parts have
  477. * a denominator of at worst 2^26 and so are expected to have lots
  478. * of trailing zero bits in their floating point representation.
  479. */
  480. { 0.0, 0.0 },
  481. { 0.06241881847381591796875, -0.84778585694947708870e-8 },
  482. { 0.124355018138885498046875, -2.35921240630155201508e-8 },
  483. { 0.185347974300384521484375, -2.43046897565983490387e-8 },
  484. { 0.2449786663055419921875, -0.31786778380154175187e-8 },
  485. { 0.302884876728057861328125, -0.83530864557675689054e-8 },
  486. { 0.358770668506622314453125, 0.17639499059427950639e-8 },
  487. { 0.412410438060760498046875, 0.35366268088529162896e-8 },
  488. { 0.4636476039886474609375, 0.50121586552767562314e-8 },
  489. { 0.512389481067657470703125, -2.07569197640365239794e-8 },
  490. { 0.558599293231964111328125, 2.21115983246433832164e-8 },
  491. { 0.602287352085113525390625, -0.59501493437085023057e-8 },
  492. { 0.643501102924346923828125, 0.58689374629746842287e-8 },
  493. { 0.6823165416717529296875, 1.32029951485689299817e-8 },
  494. { 0.71882998943328857421875, 1.01883359311982641515e-8 },
  495. { 0.75315129756927490234375, -1.66070805128190106297e-8 },
  496. { 0.785398185253143310546875, -2.18556950009312141541e-8 }
  497. };
  498. int k = (int)(16.0*(b/a + 0.03125)); /* 0 to 16 */
  499. double kd = (double)k/16.0;
  500. double ah = a, al = 0.0,
  501. bh = b, bl = 0.0,
  502. ch, cl, q, q2;
  503. _fp_normalize(ah, al);
  504. _fp_normalize(bh, bl);
  505. ch = bh - ah*kd; cl = bl - al*kd;
  506. ah = ah + bh*kd; al = al + bl*kd;
  507. bh = ch; bl = cl;
  508. /* Now |(a/b)| <= 1/32 */
  509. ah = fp_add(ah, al, &al); /* Re-normalise */
  510. bh = fp_add(bh, bl, &bl);
  511. /* Compute approximation to b/a */
  512. rh = (bh + bl)/(ah + al); rl = 0.0;
  513. _fp_normalize(rh, rl);
  514. bh -= ah*rh; bl -= al*rh;
  515. rl = (bh + bl)/(ah + al); /* Quotient now formed */
  516. /*
  517. * Now it is necessary to compute atan(q) to one-and-a-half precision.
  518. * Since |q| < 1/32 I will leave just q as the high order word of
  519. * the result and compute atan(q)-q as a single precision value. This
  520. * gives about 16 bits accuracy beyond regular single precision work.
  521. */
  522. q = rh + rl;
  523. q2 = q*q;
  524. /* The expansion the follows could be done better using a minimax poly */
  525. rl -= q*q2*(0.33333333333333333333 -
  526. q2*(0.20000000000000000000 -
  527. q2*(0.14285714285714285714 -
  528. q2*(0.11111111111111111111 -
  529. q2* 0.09090909090909090909))));
  530. /* OK - now (rh, rl) is atan(reduced b/a). Need to add on atan(kd) */
  531. rh += _atan[k].h;
  532. rl += _atan[k].l;
  533. }
  534. /*
  535. * The following constants give high precision versions of pi and pi/2,
  536. * and the high partwords (p2h and pih) have lots of low order zero bits
  537. * in their binary representation. Expect (=require) that the arithmetic
  538. * that computes thh is done without introduced rounding error.
  539. */
  540. #define _p2h 1.57079632580280303955078125
  541. #define _p2l 9.92093579680540441639751e-10
  542. #define _pih 3.14159265160560607910156250
  543. #define _pil 1.984187159361080883279502e-9
  544. switch (octant)
  545. {
  546. default:
  547. case 0: thh = rh; thl = rl; break;
  548. case 1: thh = _p2h - rh; thl = _p2l - rl; break;
  549. case 2: thh = _p2h + rh; thl = _p2l + rl; break;
  550. case 3: thh = _pih - rh; thl = _pil - rl; break;
  551. case 4: thh = -_pih + rh; thl = -_pil + rl; break;
  552. case 5: thh = -_p2h - rh; thl = -_p2l - rl; break;
  553. case 6: thh = -_p2h + rh; thl = -_p2l + rl; break;
  554. case 7: thh = -rh; thl = -rl; break;
  555. }
  556. #undef _p2h
  557. #undef _p2l
  558. #undef _pih
  559. #undef _pil
  560. *thetah = fp_add(thh, thl, thetal);
  561. }
  562. static void extended_log(int k, double a, double b,
  563. double *logrh, double *logrl)
  564. {
  565. /*
  566. * If we had exact arithmetic this procedure could be:
  567. * k*log(2) + 0.5*log(a^2 + b^2)
  568. */
  569. double al = 0.0, bl = 0.0, all = 0.0, bll = 0.0, c, ch, cl, cll;
  570. double w, q, qh, ql, rh, rl;
  571. int n;
  572. /*
  573. * First (a^2 + b^2) is calculated, using extra precision.
  574. * Because any rounding at this stage can lead to bad errors in
  575. * the power that I eventually want to compute, I use 3-word
  576. * arithmetic here, and with the version of _fp_normalize given
  577. * above and IEEE or IBM370 arithmetic this part of the
  578. * computation is exact.
  579. */
  580. _fp_normalize(a, al); _fp_normalize(al, all);
  581. _fp_normalize(b, bl); _fp_normalize(bl, bll);
  582. ch = a*a + b*b;
  583. cl = 2.0*(a*al + b*bl);
  584. cll = (al*al + bl*bl) +
  585. all*(2.0*(a + al) + all) +
  586. bll*(2.0*(b + bl) + bll);
  587. _fp_normalize(ch, cl);
  588. _fp_normalize(cl, cll);
  589. c = ch + (cl + cll); /* single precision approximation */
  590. /*
  591. * At this stage the scaling of the input value will mean that we
  592. * have 0.25 <= c <= 2.0
  593. *
  594. * Now rewrite things as
  595. * (2*k + n)*log(s) + 0.5*log((a^2 + b^2)/2^n))
  596. * where s = sqrt(2)
  597. * and where the arg to the log is in sqrt(0.5), sqrt(2)
  598. */
  599. #define _sqrt_half 0.70710678118654752440
  600. #define _sqrt_two 1.41421356237309504880
  601. k = 2*k;
  602. while (c < _sqrt_half)
  603. { k -= 1;
  604. ch *= 2.0;
  605. cl *= 2.0;
  606. cll *= 2.0;
  607. c *= 2.0;
  608. }
  609. while (c > _sqrt_two)
  610. { k += 1;
  611. ch *= 0.5;
  612. cl *= 0.5;
  613. cll *= 0.5;
  614. c *= 0.5;
  615. }
  616. #undef _sqrt_half
  617. #undef _sqrt_two
  618. n = (int)(16.0/c + 0.5);
  619. w = (double)n / 16.0;
  620. ch *= w;
  621. cl *= w;
  622. cll *= w; /* Now |c-1| < 0.04317 */
  623. ch = (ch - 0.5) - 0.5;
  624. ch = fp_add(ch, cl, &cl);
  625. cl = cl + cll;
  626. /*
  627. * (ch, cl) is now the reduced argument ready for calculating log(1+c),
  628. * and now that the reduction is over I can drop back to the use of just
  629. * two doubleprecision values to represent c.
  630. */
  631. c = ch + cl;
  632. qh = c / (2.0 + c);
  633. ql = 0.0;
  634. _fp_normalize(qh, ql);
  635. ql = ((ch - qh*(2.0 + ch)) + cl - qh*cl) / (2.0 + c);
  636. /* (qh, ql) is now c/(2.0 + c) */
  637. rh = qh; /* 18 bits bigger than low part will end up */
  638. q = qh + ql;
  639. w = q*q;
  640. rl = ql + q*w*(0.33333333333333333333 +
  641. w*(0.20000000000000000000 +
  642. w*(0.14285714285714285714 +
  643. w*(0.11111111111111111111 +
  644. w*(0.09090909090909090909)))));
  645. /*
  646. * (rh, rl) is now atan(c) correct to double precision plus about 18 bits.
  647. */
  648. { double temp;
  649. static struct { double h; double l; } _log_table[] = {
  650. /*
  651. * The following values are (in extra precision) -log(n/16)/2 for n
  652. * in the range 11 to 23 (i.e. roughly over sqrt(2)/2 to sqrt(2))
  653. */
  654. { 0.1873466968536376953125, 2.786706765149099245e-8 },
  655. { 0.1438410282135009765625, 0.801238948715710950e-8 },
  656. { 0.103819668292999267578125, 1.409612298322959552e-8 },
  657. { 0.066765725612640380859375, -2.930037906928620319e-8 },
  658. { 0.0322692394256591796875, 2.114312640614896196e-8 },
  659. { 0.0, 0.0 },
  660. { -0.0303122997283935546875, -1.117982386660280307e-8 },
  661. { -0.0588915348052978515625, 1.697710612429310295e-8 },
  662. { -0.08592510223388671875, -2.622944289242004947e-8 },
  663. { -0.111571788787841796875, 1.313073691899185245e-8 },
  664. { -0.135966837406158447265625, -2.033566243215020975e-8 },
  665. { -0.159226894378662109375, 2.881939480146987639e-8 },
  666. { -0.18145275115966796875, 0.431498374218108783e-8 }};
  667. rh = fp_add(rh, _log_table[n-11].h, &temp);
  668. rl = temp + _log_table[n-11].l + rl;
  669. }
  670. #define _exact_part_logroot2 0.3465735912322998046875
  671. #define _approx_part_logroot2 (-9.5232714997888393927e-10)
  672. /* Multiply this by k and add it in */
  673. { double temp, kd = (double)k;
  674. rh = fp_add(rh, kd*_exact_part_logroot2, &temp);
  675. rl = rl + temp + kd*_approx_part_logroot2;
  676. }
  677. #undef _exact_part_logroot2
  678. #undef _approx_part_logroot2
  679. *logrh = rh;
  680. *logrl = rl;
  681. return;
  682. }
  683. Complex MS_CDECL Cpow(Complex z1, Complex z2)
  684. {
  685. double a = z1.real, b = z1.imag,
  686. c = z2.real, d = z2.imag;
  687. int k, m, n;
  688. double scale;
  689. double logrh, logrl, thetah, thetal;
  690. double r, i, rh, rl, ih, il, clow, dlow, q;
  691. double cw, sw, cost, sint;
  692. if (b == 0.0 && d == 0.0 && a >= 0.0)/* Simple case if both args are real */
  693. { z1.real = pow(a, c);
  694. z1.imag = 0.0;
  695. return z1;
  696. }
  697. /*
  698. * Start by scaling z1 by dividing out a power of 2 so that |z1| is
  699. * fairly close to 1
  700. */
  701. if (a == 0.0)
  702. { if (b == 0.0) return z1; /* 0.0**anything is really an error */
  703. /* The exact values returned by frexp do not matter here */
  704. (void)frexp(b, &k);
  705. }
  706. else
  707. { (void)frexp(a, &k);
  708. if (b != 0.0)
  709. { int n;
  710. (void)frexp(b, &n);
  711. if (n > k) k = n;
  712. }
  713. }
  714. scale = ldexp(1.0, k);
  715. a /= scale;
  716. b /= scale;
  717. /*
  718. * The idea next is to express z1 as r*exp(i theta), then z1**z2
  719. * is exp(z2*log(z1)) and the exponent simplifies to
  720. * (c*log r - d*theta) + i(c theta + d log r).
  721. * Note r = scale*sqrt(a*a + b*b) now.
  722. * The argument for exp() must be computed to noticably more than
  723. * regular precision, since otherwise exp() greatly magnifies the
  724. * error in the real part (if it is large and positive) and range
  725. * reduction in the imaginary part can equally lead to accuracy
  726. * problems. Neither c nor d can usefully be anywhere near the
  727. * extreme range of floating point values, so I will not even try
  728. * to scale them, thus I will end up happy(ish) if I can compute
  729. * atan2(b, a) and log(|z1|) in one-and-a-half precision form.
  730. * It would be best to compute theta in units of pi/4 rather than in
  731. * raidians, since then the case of z^n (integer n) with arg(z) an
  732. * exact multiple of pi/4 would work out better. However at present I
  733. * just hope that the 1.5-precision working is good enough.
  734. */
  735. extended_atan2(b, a, &thetah, &thetal);
  736. extended_log(k, a, b, &logrh, &logrl);
  737. /* Normalise all numbers */
  738. clow = 0.0;
  739. dlow = 0.0;
  740. _fp_normalize(c, clow);
  741. _fp_normalize(d, dlow);
  742. /* (rh, rl) = c*logr - d*theta; */
  743. rh = c*logrh - d*thetah; /* No rounding in this computation */
  744. rl = c*logrl + clow*(logrh + logrl) - d*thetal - dlow*(thetah + thetal);
  745. /* (ih, il) = c*theta + d*logr; */
  746. ih = c*thetah + d*logrh; /* No rounding in this computation */
  747. il = c*thetal + clow*(thetah + thetal) + d*logrl + dlow*(logrh + logrl);
  748. /*
  749. * Now it remains to take the exponential of the extended precision
  750. * value in ((rh, rl), (ih, il)).
  751. * Range reduce the real part by multiples of log(2), and the imaginary
  752. * part by multiples of pi/2.
  753. */
  754. rh = fp_add(rh, rl, &rl); /* renormalise */
  755. r = rh + rl; /* Approximate value */
  756. #define _recip_log_2 1.4426950408889634074
  757. q = r * _recip_log_2;
  758. m = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
  759. q = (double)m;
  760. #undef _recip_log_2
  761. /*
  762. * log 2 = 11629080/2^24 - 0.000 00000 19046 54299 95776 78785
  763. * to reasonable accuracy. It is vital that the exact part be
  764. * read in as a number with lots of low zero bits, so when it gets
  765. * multiplied by the integer q there is NO rounding needed.
  766. */
  767. #define _exact_part_log2 0.693147182464599609375
  768. #define _approx_part_log2 (-1.9046542999577678785418e-9)
  769. rh = rh - q*_exact_part_log2;
  770. rl = rl - q*_approx_part_log2;
  771. #undef _exact_part_log2
  772. #undef _approx_part_log2
  773. r = exp(rh + rl); /* This should now be accurate enough */
  774. ih = fp_add(ih, il, &il);
  775. i = ih + il; /* Approximate value */
  776. #define _recip_pi_by_2 0.6366197723675813431
  777. q = i * _recip_pi_by_2;
  778. n = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
  779. q = (double)n;
  780. /*
  781. * pi/2 = 105414357/2^26 + 0.000 00000 09920 93579 68054 04416 39751
  782. * to reasonable accuracy. It is vital that the exact part be
  783. * read in as a number with lots of low zero bits, so when it gets
  784. * multiplied by the integer q there is NO rounding needed.
  785. */
  786. #define _exact_part_pi2 1.57079632580280303955078125
  787. #define _approx_part_pi2 9.92093579680540441639751e-10
  788. ih = ih - q*_exact_part_pi2;
  789. il = il - q*_approx_part_pi2;
  790. #undef _recip_pi_by_2
  791. #undef _exact_part_pi2
  792. #undef _approx_part_pi2
  793. i = ih + il; /* Now accurate enough */
  794. /*
  795. * Having done super-careful range reduction I can call the regular
  796. * sin/cos routines here. If sin/cos could both be computed together
  797. * that could speed things up a little bit, but by this stage they have
  798. * not much in common.
  799. */
  800. cw = cos(i);
  801. sw = sin(i);
  802. switch (n & 3) /* quadrant control */
  803. {
  804. default:
  805. case 0: cost = cw; sint = sw; break;
  806. case 1: cost = -sw; sint = cw; break;
  807. case 2: cost = -cw; sint = -sw; break;
  808. case 3: cost = sw; sint = -cw; break;
  809. }
  810. /*
  811. * Now, at long last, I can assemble the results and return.
  812. */
  813. z1.real = ldexp(r*cost, m);
  814. z1.imag = ldexp(r*sint, m);
  815. return z1;
  816. }
  817. /*
  818. * End of complex-to-complex-power code.
  819. */
  820. #endif
  821. /* end of arith07.c */