arith07.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. /* arith07.c Copyright (C) 1990-2002 Codemist Ltd */
  2. /*
  3. * Arithmetic functions. negation plus a load of Common Lisp things
  4. * for support of complex numbers.
  5. *
  6. */
  7. /*
  8. * This code may be used and modified, and redistributed in binary
  9. * or source form, subject to the "CCL Public License", which should
  10. * accompany it. This license is a variant on the BSD license, and thus
  11. * permits use of code derived from this in either open and commercial
  12. * projects: but it does require that updates to this code be made
  13. * available back to the originators of the package.
  14. * Before merging other code in with this or linking this code
  15. * with other packages or libraries please check that the license terms
  16. * of the other material are compatible with those of this.
  17. */
  18. /* Signature: 0a772466 08-Apr-2002 */
  19. #include <stdarg.h>
  20. #include <string.h>
  21. #include <ctype.h>
  22. #include <math.h>
  23. #include "machine.h"
  24. #include "tags.h"
  25. #include "cslerror.h"
  26. #include "externs.h"
  27. #include "arith.h"
  28. #ifdef TIMEOUT
  29. #include "timeout.h"
  30. #endif
  31. Lisp_Object copyb(Lisp_Object a)
  32. /*
  33. * copy a bignum.
  34. */
  35. {
  36. Lisp_Object b, nil;
  37. int32 len = bignum_length(a), i;
  38. push(a);
  39. b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
  40. pop(a);
  41. errexit();
  42. len = (len-CELL)/4;
  43. for (i=0; i<len; i++)
  44. bignum_digits(b)[i] = bignum_digits(a)[i];
  45. return b;
  46. }
  47. Lisp_Object negateb(Lisp_Object a)
  48. /*
  49. * Negate a bignum. Note that negating the 1-word bignum
  50. * value of 0x08000000 will produce a fixnum as a result,
  51. * which might confuse the caller... in a similar way negating
  52. * the value -0x40000000 will need to promote from a one-word
  53. * bignum to a 2-word bignum. How messy just for negation!
  54. */
  55. {
  56. Lisp_Object b, nil;
  57. int32 len = bignum_length(a), i, carry;
  58. if (len == CELL+4) /* one-word bignum - do specially */
  59. { len = -(int32)bignum_digits(a)[0];
  60. if (len == fix_mask) return fixnum_of_int(len);
  61. else if (len == 0x40000000) return make_two_word_bignum(0, len);
  62. else return make_one_word_bignum(len);
  63. }
  64. push(a);
  65. b = getvector(TAG_NUMBERS, TYPE_BIGNUM, len);
  66. pop(a);
  67. errexit();
  68. len = (len-CELL)/4-1;
  69. carry = -1;
  70. for (i=0; i<len; i++)
  71. { carry = clear_top_bit(~bignum_digits(a)[i]) + top_bit(carry);
  72. bignum_digits(b)[i] = clear_top_bit(carry);
  73. }
  74. /*
  75. * Handle the top digit separately since it is signed.
  76. */
  77. carry = ~bignum_digits(a)[i] + top_bit(carry);
  78. if (!signed_overflow(carry))
  79. {
  80. /*
  81. * If the most significant word ends up as -1 then I just might
  82. * have 0x40000000 in the next word down and so I may need to shrink
  83. * the number. Since I handled 1-word bignums specially I have at
  84. * least two words to deal with here.
  85. */
  86. if (carry == -1 && (bignum_digits(b)[i-1] & 0x40000000) != 0)
  87. { bignum_digits(b)[i-1] |= ~0x7fffffff;
  88. numhdr(b) -= pack_hdrlength(1);
  89. #ifdef ADDRESS_64
  90. if ((i & 1) != 0) bignum_digits(b)[i] = 0;
  91. else bignum_digits(b)[i] = make_bighdr(2);
  92. #else
  93. if ((i & 1) == 0) bignum_digits(b)[i] = 0;
  94. else bignum_digits(b)[i] = make_bighdr(2);
  95. #endif
  96. }
  97. else bignum_digits(b)[i] = carry; /* no shrinking needed */
  98. return b;
  99. }
  100. /*
  101. * Here I have overflow: this can only happen when I negate a number
  102. * that started off with 0xc0000000 in the most significant digit,
  103. * and I have to pad a zero word onto the front.
  104. */
  105. bignum_digits(b)[i] = clear_top_bit(carry);
  106. return lengthen_by_one_bit(b, carry);
  107. }
  108. /*
  109. * generic negation
  110. */
  111. Lisp_Object negate(Lisp_Object a)
  112. {
  113. #ifdef COMMON
  114. Lisp_Object nil; /* needed for errexit() */
  115. #endif
  116. switch ((int)a & TAG_BITS)
  117. {
  118. case TAG_FIXNUM:
  119. { int32 aa = -int_of_fixnum(a);
  120. /*
  121. * negating the number -#x8000000 (which is a fixnum) yields a value
  122. * which just fails to be a fixnum.
  123. */
  124. if (aa != 0x08000000) return fixnum_of_int(aa);
  125. else return make_one_word_bignum(aa);
  126. }
  127. #ifdef COMMON
  128. case TAG_SFLOAT:
  129. { Float_union aa;
  130. aa.i = a - TAG_SFLOAT;
  131. aa.f = (float) (-aa.f);
  132. return (aa.i & ~(int32)0xf) + TAG_SFLOAT;
  133. }
  134. #endif
  135. case TAG_NUMBERS:
  136. { int32 ha = type_of_header(numhdr(a));
  137. switch (ha)
  138. {
  139. case TYPE_BIGNUM:
  140. return negateb(a);
  141. #ifdef COMMON
  142. case TYPE_RATNUM:
  143. { Lisp_Object n = numerator(a),
  144. d = denominator(a);
  145. push(d);
  146. n = negate(n);
  147. pop(d);
  148. errexit();
  149. return make_ratio(n, d);
  150. }
  151. case TYPE_COMPLEX_NUM:
  152. { Lisp_Object r = real_part(a),
  153. i = imag_part(a);
  154. push(i);
  155. r = negate(r);
  156. pop(i);
  157. errexit();
  158. push(r);
  159. i = negate(i);
  160. pop(r);
  161. errexit();
  162. return make_complex(r, i);
  163. }
  164. #endif
  165. default:
  166. return aerror1("bad arg for minus", a);
  167. }
  168. }
  169. case TAG_BOXFLOAT:
  170. { double d = float_of_number(a);
  171. return make_boxfloat(-d, type_of_header(flthdr(a)));
  172. }
  173. default:
  174. return aerror1("bad arg for minus", a);
  175. }
  176. }
  177. /*****************************************************************************/
  178. /*** Transcendental functions etcetera. ***/
  179. /*****************************************************************************/
  180. /*
  181. * Much of the code here is extracted from the portable Fortran library
  182. * used by Codemist with its Fortran compiler.
  183. */
  184. /*
  185. * The object of the following macro is to adjust the floating point
  186. * variables concerned so that the more significant one can be squared
  187. * with NO LOSS OF PRECISION. It is only used when there is no danger
  188. * of over- or under-flow.
  189. *
  190. * This code is NOT PORTABLE but can be modified for use elsewhere
  191. * It should, however, serve for IEEE and IBM FP formats.
  192. */
  193. #define _fp_normalize(high, low) \
  194. { double temp; /* access to representation */ \
  195. temp = high; /* take original number */ \
  196. ((int32 *)&temp)[current_fp_rep & FP_WORD_ORDER] = 0; \
  197. /* make low part of mantissa 0 */ \
  198. low += (high - temp); /* add into low-order result */ \
  199. high = temp; \
  200. }
  201. #ifdef COMMON
  202. double MS_CDECL Cabs(Complex z)
  203. {
  204. /*
  205. * Obtain the absolute value of a complex number - note that the main
  206. * agony here is in ensuring that neither overflow nor underflow can
  207. * wreck the calculation. Given ideal arithmetic the sum could be carried
  208. * through as just sqrt(x^2 + y^2).
  209. */
  210. double x = z.real, y = z.imag;
  211. double scale;
  212. int n1, n2;
  213. if (x==0.0) return fabs(y);
  214. else if (y==0.0) return fabs(x);
  215. (void)frexp(x, &n1);
  216. (void)frexp(y, &n2);
  217. /* The exact range of values returned by frexp does not matter here */
  218. if (n2>n1) n1 = n2;
  219. /* n1 is now the exponent of the larger (in absolute value) of x, y */
  220. scale = ldexp(1.0, n1); /* can not be 0.0 */
  221. x /= scale;
  222. y /= scale;
  223. /* The above scaling operation introduces no rounding error (since the */
  224. /* scale factor is exactly a power of 2). It reduces the larger of x, y */
  225. /* to be somewhere near 1.0 so overflow in x*x+y*y is impossible. It is */
  226. /* still possible that one of x*x and y*y will underflow (but not both) */
  227. /* but this is harmless. */
  228. return scale * sqrt(x*x + y*y);
  229. }
  230. Complex MS_CDECL Ccos(Complex z)
  231. {
  232. double x = z.real, y = z.imag;
  233. /*
  234. * cos(x + iy) = cos(x)*cosh(y) - i sin(x)*sinh(y)
  235. * For smallish y this can be used directly. For |y| > 50 I will
  236. * compute sinh and cosh as just +/- exp(|y|)/2
  237. */
  238. double s = sin(x), c = cos(x);
  239. double absy = fabs(y);
  240. if (absy <= 50.0)
  241. { double sh = sinh(y), ch = cosh(y);
  242. z.real = c*ch;
  243. z.imag = - s*sh;
  244. return z;
  245. }
  246. else
  247. {
  248. double w;
  249. int n = _reduced_exp(absy, &w) - 1;
  250. z.real = ldexp(c*w, n);
  251. if (y < 0.0) z.imag = ldexp(s*w, n);
  252. else z.imag = ldexp(-s*w, n);
  253. return z;
  254. }
  255. }
  256. static double reduced_power(double a, int n)
  257. {
  258. /*
  259. * Compute (1 + a)^n - 1 avoiding undue roundoff error.
  260. * Assumes n >= 1 on entry and that a is small.
  261. */
  262. if (n == 1) return a;
  263. { double d = reduced_power(a, n/2);
  264. d = (2.0 + d)*d;
  265. if (n & 1) d += (1.0 + d)*a;
  266. return d;
  267. }
  268. }
  269. /*
  270. * The following values are included for documentation purposes - they
  271. * give the largest args that can be given to exp() without leading to
  272. * overflow on IBM mainframe and IEEE machines.
  273. * #ifdef IBMFLOAT
  274. * #define _exp_arg_limit 174.673089501106208
  275. * #else
  276. * #define _exp_arg_limit 709.78271289338397
  277. * #endif
  278. * Note that in any case exp(50.0) will not overflow (it is only 5.2e21),
  279. * so it can be evaluated the simple direct way.
  280. */
  281. int _reduced_exp(double x, double *r)
  282. {
  283. /*
  284. * (*r) = exp(x)/2^n; return n;
  285. * where n will be selected so that *r gets set to a fairly small value
  286. * (precise range of r unimportant provided it will be WELL away from
  287. * chances of overflow, even when exp(x) would actually overflow). This
  288. * function may only be called with argument x that is positive and
  289. * large enough that n will end up satisfying n>=1. The coding here
  290. * will ensure that if x>4.0, and in general the use of this function
  291. * will only be for x > 50.
  292. * For IBM hardware it would be good to be able to control the value
  293. * of n mod 4, maybe, to help counter wobbling precision. This is not
  294. * done here.
  295. */
  296. int n;
  297. double f;
  298. n = (int)(x / 7.625 + 0.5);
  299. /*
  300. * 7.625 = 61/8 and is expected to have an exact floating point
  301. * representation here, so f is computed without any rounding error.
  302. * (do I need something like the (x - 0.5) - 0.5 trick here?)
  303. */
  304. f = exp(x - 7.625*(double)n);
  305. /*
  306. * the magic constant is ((exp(61/8) / 2048) - 1) and it arises because
  307. * 61/88 is a decent rational approximation to log(2), hence exp(61/8)
  308. * is almost 2^11. Thus I compute exp(x) as
  309. * 2^(11*n) * (exp(61/8)/2^11)^n * exp(f)
  310. * The first factor is exact, the second is (1+e)^n where e is small and
  311. * n is an integer, so can be computer accurately, and the residue f at the
  312. * end is small enough not to give over-bad trouble.
  313. * The numeric constant given here was calculated with the REDUCE 3.3
  314. * bigfloat package.
  315. */
  316. #define _e61q8 3.81086435594567676751e-4
  317. *r = reduced_power(_e61q8, n)*f + f;
  318. #undef _e61q8
  319. return 11*n;
  320. }
  321. Complex MS_CDECL Cexp(Complex z)
  322. {
  323. double x = z.real, y = z.imag;
  324. /*
  325. * value is exp(x)*(cos(y) + i sin(y)) but have care with overflow
  326. * Here (and throughout the complex library) there is an opportunity
  327. * to save time by computing sin(y) and cos(y) together. Since this
  328. * code is (to begin with) to sit on top of an arbitrary C library,
  329. * perhaps with hardware support for the calculation of real-valued
  330. * trig functions I am not going to try to realise this saving.
  331. */
  332. double s = sin(y), c = cos(y);
  333. /*
  334. * if x > 50 I will use a cautious sceme which computes exp(x) with
  335. * its (binary) exponent separated. Note that 50.0 is chosen as a
  336. * number noticably smaller than _exp_arg_limit (exp(50) = 5.18e21),
  337. * but is not a critical very special number.
  338. */
  339. if (x <= 50.0) /* includes x < 0.0, of course */
  340. { double w = exp(x);
  341. z.real = w*c;
  342. z.imag = w*s;
  343. return z;
  344. }
  345. else
  346. { double w;
  347. int n = _reduced_exp(x, &w);
  348. z.real = ldexp(w*c, n);
  349. z.imag = ldexp(w*s, n);
  350. return z;
  351. }
  352. }
  353. Complex MS_CDECL Cln(Complex z)
  354. {
  355. double x = z.real, y = z.imag;
  356. /*
  357. * if x and y are both very large then cabs(z) may be out of range
  358. * even though log or if is OK. Thus it is necessary to perform an
  359. * elaborate scaled calculation here, and not just
  360. * z.real = log(cabs(z));
  361. */
  362. double scale, r;
  363. int n1, n2;
  364. if (x==0.0) r = log(fabs(y));
  365. else if (y==0.0) r = log(fabs(x));
  366. else
  367. {
  368. (void)frexp(x, &n1);
  369. (void)frexp(y, &n2);
  370. /* The exact range of values returned by frexp does not matter here */
  371. if (n2>n1) n1 = n2;
  372. scale = ldexp(1.0, n1);
  373. x /= scale;
  374. y /= scale;
  375. r = log(scale) + 0.5*log(x*x + y*y);
  376. }
  377. z.real = r;
  378. /*
  379. * The C standard is not very explicit about the behaviour of atan2(0.0, -n)
  380. * while for Fortran it is necessary that this returns +pi not -pi. Hence
  381. * with extreme caution I put a special test here.
  382. */
  383. if (y == 0.0)
  384. if (x < 0.0) z.imag = _pi;
  385. else z.imag = 0.0;
  386. else z.imag = atan2(y, x);
  387. return z;
  388. }
  389. /*
  390. * Complex raising to a power. This seems to be pretty nasty
  391. * to get right, and the code includes extra high precision variants
  392. * on atan() and log(). Further refinements wrt efficiency may be
  393. * possible later on.
  394. * This code has been partially tested, and seems to be uniformly
  395. * better than using just a**b = exp(b*log(a)), but much more careful
  396. * study is needed before it can possibly be claimed that it is
  397. * right in the sense of not throwing away accuracy when it does not
  398. * have to. I also need to make careful checks to verify that the
  399. * correct (principal) value is computed.
  400. */
  401. /*
  402. * The next function is used after arithmetic has been done on extra-
  403. * precision numbers so that the relationship between high and low parts
  404. * is no longer known. Re-instate it.
  405. */
  406. #define _two_minus_25 2.98023223876953125e-8 /* 2^(-25) */
  407. static double fp_add(double a, double b, double *lowres)
  408. {
  409. /* Result is the high part of a+b, with the low part assigned to *lowres */
  410. double absa, absb;
  411. if (a >= 0.0) absa = a; else absa = -a;
  412. if (b >= 0.0) absb = b; else absb = -b;
  413. if (absa < absb)
  414. { double t = a; a = b; b = t;
  415. }
  416. /* Now a is the larger (in absolute value) of the two numbers */
  417. if (absb > absa * _two_minus_25)
  418. { double al = 0.0, bl = 0.0;
  419. /*
  420. * If the exponent difference beweeen a and b is no more than 25 then
  421. * I can add the top part (20 or 24 bits) of a to the top part of b
  422. * without going beyond the 52 or 56 bits that a full mantissa can hold.
  423. */
  424. _fp_normalize(a, al);
  425. _fp_normalize(b, bl);
  426. a = a + b; /* No rounding needed here */
  427. b = al + bl;
  428. if (a == 0.0)
  429. { a = b;
  430. b = 0.0;
  431. }
  432. }
  433. /*
  434. * The above step leaves b small wrt the value in a (unless a+b led
  435. * to substantial cancellation of leading digits), but leaves the high
  436. * part a with bits everywhere. Force low part of a to zero.
  437. */
  438. { double al = 0.0;
  439. _fp_normalize(a, al);
  440. b = b + al;
  441. }
  442. if (a >= 0.0) absa = a; else absa = -a;
  443. if (b >= 0.0) absb = b; else absb = -b;
  444. if (absb > absa * _two_minus_25)
  445. /*
  446. * If on input a is close to -b, then a+b is close to zero. In this
  447. * case the exponents of a abd b matched, and so earlier calculations
  448. * have all been done exactly. Go around again to split residue into
  449. * high and low parts.
  450. */
  451. { double al = 0.0, bl = 0.0;
  452. _fp_normalize(b, bl);
  453. a = a + b;
  454. _fp_normalize(a, al);
  455. b = bl + al;
  456. }
  457. *lowres = b;
  458. return a;
  459. }
  460. #undef _two_minus_25
  461. static void extended_atan2(double b, double a, double *thetah, double *thetal)
  462. {
  463. int octant;
  464. double rh, rl, thh, thl;
  465. /*
  466. * First reduce the argument to the first octant (i.e. a, b both +ve,
  467. * and b <= a).
  468. */
  469. if (b < 0.0)
  470. { octant = 4;
  471. a = -a;
  472. b = -b;
  473. }
  474. else octant = 0;
  475. if (a < 0.0)
  476. { double t = a;
  477. octant += 2;
  478. a = b;
  479. b = -t;
  480. }
  481. if (b > a)
  482. { double t = a;
  483. octant += 1;
  484. a = b;
  485. b = t;
  486. }
  487. { static struct { double h; double l;} _atan[] = {
  488. /*
  489. * The table here gives atan(n/16) in 1.5-precision for n=0..16
  490. * Note that all the magic numbers used in this file were calculated
  491. * using the REDUCE bigfloat package, and all the 'exact' parts have
  492. * a denominator of at worst 2^26 and so are expected to have lots
  493. * of trailing zero bits in their floating point representation.
  494. */
  495. { 0.0, 0.0 },
  496. { 0.06241881847381591796875, -0.84778585694947708870e-8 },
  497. { 0.124355018138885498046875, -2.35921240630155201508e-8 },
  498. { 0.185347974300384521484375, -2.43046897565983490387e-8 },
  499. { 0.2449786663055419921875, -0.31786778380154175187e-8 },
  500. { 0.302884876728057861328125, -0.83530864557675689054e-8 },
  501. { 0.358770668506622314453125, 0.17639499059427950639e-8 },
  502. { 0.412410438060760498046875, 0.35366268088529162896e-8 },
  503. { 0.4636476039886474609375, 0.50121586552767562314e-8 },
  504. { 0.512389481067657470703125, -2.07569197640365239794e-8 },
  505. { 0.558599293231964111328125, 2.21115983246433832164e-8 },
  506. { 0.602287352085113525390625, -0.59501493437085023057e-8 },
  507. { 0.643501102924346923828125, 0.58689374629746842287e-8 },
  508. { 0.6823165416717529296875, 1.32029951485689299817e-8 },
  509. { 0.71882998943328857421875, 1.01883359311982641515e-8 },
  510. { 0.75315129756927490234375, -1.66070805128190106297e-8 },
  511. { 0.785398185253143310546875, -2.18556950009312141541e-8 }
  512. };
  513. int k = (int)(16.0*(b/a + 0.03125)); /* 0 to 16 */
  514. double kd = (double)k/16.0;
  515. double ah = a, al = 0.0,
  516. bh = b, bl = 0.0,
  517. ch, cl, q, q2;
  518. _fp_normalize(ah, al);
  519. _fp_normalize(bh, bl);
  520. ch = bh - ah*kd; cl = bl - al*kd;
  521. ah = ah + bh*kd; al = al + bl*kd;
  522. bh = ch; bl = cl;
  523. /* Now |(a/b)| <= 1/32 */
  524. ah = fp_add(ah, al, &al); /* Re-normalise */
  525. bh = fp_add(bh, bl, &bl);
  526. /* Compute approximation to b/a */
  527. rh = (bh + bl)/(ah + al); rl = 0.0;
  528. _fp_normalize(rh, rl);
  529. bh -= ah*rh; bl -= al*rh;
  530. rl = (bh + bl)/(ah + al); /* Quotient now formed */
  531. /*
  532. * Now it is necessary to compute atan(q) to one-and-a-half precision.
  533. * Since |q| < 1/32 I will leave just q as the high order word of
  534. * the result and compute atan(q)-q as a single precision value. This
  535. * gives about 16 bits accuracy beyond regular single precision work.
  536. */
  537. q = rh + rl;
  538. q2 = q*q;
  539. /* The expansion the follows could be done better using a minimax poly */
  540. rl -= q*q2*(0.33333333333333333333 -
  541. q2*(0.20000000000000000000 -
  542. q2*(0.14285714285714285714 -
  543. q2*(0.11111111111111111111 -
  544. q2* 0.09090909090909090909))));
  545. /* OK - now (rh, rl) is atan(reduced b/a). Need to add on atan(kd) */
  546. rh += _atan[k].h;
  547. rl += _atan[k].l;
  548. }
  549. /*
  550. * The following constants give high precision versions of pi and pi/2,
  551. * and the high partwords (p2h and pih) have lots of low order zero bits
  552. * in their binary representation. Expect (=require) that the arithmetic
  553. * that computes thh is done without introduced rounding error.
  554. */
  555. #define _p2h 1.57079632580280303955078125
  556. #define _p2l 9.92093579680540441639751e-10
  557. #define _pih 3.14159265160560607910156250
  558. #define _pil 1.984187159361080883279502e-9
  559. switch (octant)
  560. {
  561. default:
  562. case 0: thh = rh; thl = rl; break;
  563. case 1: thh = _p2h - rh; thl = _p2l - rl; break;
  564. case 2: thh = _p2h + rh; thl = _p2l + rl; break;
  565. case 3: thh = _pih - rh; thl = _pil - rl; break;
  566. case 4: thh = -_pih + rh; thl = -_pil + rl; break;
  567. case 5: thh = -_p2h - rh; thl = -_p2l - rl; break;
  568. case 6: thh = -_p2h + rh; thl = -_p2l + rl; break;
  569. case 7: thh = -rh; thl = -rl; break;
  570. }
  571. #undef _p2h
  572. #undef _p2l
  573. #undef _pih
  574. #undef _pil
  575. *thetah = fp_add(thh, thl, thetal);
  576. }
  577. static void extended_log(int k, double a, double b,
  578. double *logrh, double *logrl)
  579. {
  580. /*
  581. * If we had exact arithmetic this procedure could be:
  582. * k*log(2) + 0.5*log(a^2 + b^2)
  583. */
  584. double al = 0.0, bl = 0.0, all = 0.0, bll = 0.0, c, ch, cl, cll;
  585. double w, q, qh, ql, rh, rl;
  586. int n;
  587. /*
  588. * First (a^2 + b^2) is calculated, using extra precision.
  589. * Because any rounding at this stage can lead to bad errors in
  590. * the power that I eventually want to compute, I use 3-word
  591. * arithmetic here, and with the version of _fp_normalize given
  592. * above and IEEE or IBM370 arithmetic this part of the
  593. * computation is exact.
  594. */
  595. _fp_normalize(a, al); _fp_normalize(al, all);
  596. _fp_normalize(b, bl); _fp_normalize(bl, bll);
  597. ch = a*a + b*b;
  598. cl = 2.0*(a*al + b*bl);
  599. cll = (al*al + bl*bl) +
  600. all*(2.0*(a + al) + all) +
  601. bll*(2.0*(b + bl) + bll);
  602. _fp_normalize(ch, cl);
  603. _fp_normalize(cl, cll);
  604. c = ch + (cl + cll); /* single precision approximation */
  605. /*
  606. * At this stage the scaling of the input value will mean that we
  607. * have 0.25 <= c <= 2.0
  608. *
  609. * Now rewrite things as
  610. * (2*k + n)*log(s) + 0.5*log((a^2 + b^2)/2^n))
  611. * where s = sqrt(2)
  612. * and where the arg to the log is in sqrt(0.5), sqrt(2)
  613. */
  614. #define _sqrt_half 0.70710678118654752440
  615. #define _sqrt_two 1.41421356237309504880
  616. k = 2*k;
  617. while (c < _sqrt_half)
  618. { k -= 1;
  619. ch *= 2.0;
  620. cl *= 2.0;
  621. cll *= 2.0;
  622. c *= 2.0;
  623. }
  624. while (c > _sqrt_two)
  625. { k += 1;
  626. ch *= 0.5;
  627. cl *= 0.5;
  628. cll *= 0.5;
  629. c *= 0.5;
  630. }
  631. #undef _sqrt_half
  632. #undef _sqrt_two
  633. n = (int)(16.0/c + 0.5);
  634. w = (double)n / 16.0;
  635. ch *= w;
  636. cl *= w;
  637. cll *= w; /* Now |c-1| < 0.04317 */
  638. ch = (ch - 0.5) - 0.5;
  639. ch = fp_add(ch, cl, &cl);
  640. cl = cl + cll;
  641. /*
  642. * (ch, cl) is now the reduced argument ready for calculating log(1+c),
  643. * and now that the reduction is over I can drop back to the use of just
  644. * two doubleprecision values to represent c.
  645. */
  646. c = ch + cl;
  647. qh = c / (2.0 + c);
  648. ql = 0.0;
  649. _fp_normalize(qh, ql);
  650. ql = ((ch - qh*(2.0 + ch)) + cl - qh*cl) / (2.0 + c);
  651. /* (qh, ql) is now c/(2.0 + c) */
  652. rh = qh; /* 18 bits bigger than low part will end up */
  653. q = qh + ql;
  654. w = q*q;
  655. rl = ql + q*w*(0.33333333333333333333 +
  656. w*(0.20000000000000000000 +
  657. w*(0.14285714285714285714 +
  658. w*(0.11111111111111111111 +
  659. w*(0.09090909090909090909)))));
  660. /*
  661. * (rh, rl) is now atan(c) correct to double precision plus about 18 bits.
  662. */
  663. { double temp;
  664. static struct { double h; double l; } _log_table[] = {
  665. /*
  666. * The following values are (in extra precision) -log(n/16)/2 for n
  667. * in the range 11 to 23 (i.e. roughly over sqrt(2)/2 to sqrt(2))
  668. */
  669. { 0.1873466968536376953125, 2.786706765149099245e-8 },
  670. { 0.1438410282135009765625, 0.801238948715710950e-8 },
  671. { 0.103819668292999267578125, 1.409612298322959552e-8 },
  672. { 0.066765725612640380859375, -2.930037906928620319e-8 },
  673. { 0.0322692394256591796875, 2.114312640614896196e-8 },
  674. { 0.0, 0.0 },
  675. { -0.0303122997283935546875, -1.117982386660280307e-8 },
  676. { -0.0588915348052978515625, 1.697710612429310295e-8 },
  677. { -0.08592510223388671875, -2.622944289242004947e-8 },
  678. { -0.111571788787841796875, 1.313073691899185245e-8 },
  679. { -0.135966837406158447265625, -2.033566243215020975e-8 },
  680. { -0.159226894378662109375, 2.881939480146987639e-8 },
  681. { -0.18145275115966796875, 0.431498374218108783e-8 }};
  682. rh = fp_add(rh, _log_table[n-11].h, &temp);
  683. rl = temp + _log_table[n-11].l + rl;
  684. }
  685. #define _exact_part_logroot2 0.3465735912322998046875
  686. #define _approx_part_logroot2 (-9.5232714997888393927e-10)
  687. /* Multiply this by k and add it in */
  688. { double temp, kd = (double)k;
  689. rh = fp_add(rh, kd*_exact_part_logroot2, &temp);
  690. rl = rl + temp + kd*_approx_part_logroot2;
  691. }
  692. #undef _exact_part_logroot2
  693. #undef _approx_part_logroot2
  694. *logrh = rh;
  695. *logrl = rl;
  696. return;
  697. }
  698. Complex MS_CDECL Cpow(Complex z1, Complex z2)
  699. {
  700. double a = z1.real, b = z1.imag,
  701. c = z2.real, d = z2.imag;
  702. int k, m, n;
  703. double scale;
  704. double logrh, logrl, thetah, thetal;
  705. double r, i, rh, rl, ih, il, clow, dlow, q;
  706. double cw, sw, cost, sint;
  707. if (b == 0.0 && d == 0.0 && a >= 0.0)/* Simple case if both args are real */
  708. { z1.real = pow(a, c);
  709. z1.imag = 0.0;
  710. return z1;
  711. }
  712. /*
  713. * Start by scaling z1 by dividing out a power of 2 so that |z1| is
  714. * fairly close to 1
  715. */
  716. if (a == 0.0)
  717. { if (b == 0.0) return z1; /* 0.0**anything is really an error */
  718. /* The exact values returned by frexp do not matter here */
  719. (void)frexp(b, &k);
  720. }
  721. else
  722. { (void)frexp(a, &k);
  723. if (b != 0.0)
  724. { int n;
  725. (void)frexp(b, &n);
  726. if (n > k) k = n;
  727. }
  728. }
  729. scale = ldexp(1.0, k);
  730. a /= scale;
  731. b /= scale;
  732. /*
  733. * The idea next is to express z1 as r*exp(i theta), then z1**z2
  734. * is exp(z2*log(z1)) and the exponent simplifies to
  735. * (c*log r - d*theta) + i(c theta + d log r).
  736. * Note r = scale*sqrt(a*a + b*b) now.
  737. * The argument for exp() must be computed to noticably more than
  738. * regular precision, since otherwise exp() greatly magnifies the
  739. * error in the real part (if it is large and positive) and range
  740. * reduction in the imaginary part can equally lead to accuracy
  741. * problems. Neither c nor d can usefully be anywhere near the
  742. * extreme range of floating point values, so I will not even try
  743. * to scale them, thus I will end up happy(ish) if I can compute
  744. * atan2(b, a) and log(|z1|) in one-and-a-half precision form.
  745. * It would be best to compute theta in units of pi/4 rather than in
  746. * raidians, since then the case of z^n (integer n) with arg(z) an
  747. * exact multiple of pi/4 would work out better. However at present I
  748. * just hope that the 1.5-precision working is good enough.
  749. */
  750. extended_atan2(b, a, &thetah, &thetal);
  751. extended_log(k, a, b, &logrh, &logrl);
  752. /* Normalise all numbers */
  753. clow = 0.0;
  754. dlow = 0.0;
  755. _fp_normalize(c, clow);
  756. _fp_normalize(d, dlow);
  757. /* (rh, rl) = c*logr - d*theta; */
  758. rh = c*logrh - d*thetah; /* No rounding in this computation */
  759. rl = c*logrl + clow*(logrh + logrl) - d*thetal - dlow*(thetah + thetal);
  760. /* (ih, il) = c*theta + d*logr; */
  761. ih = c*thetah + d*logrh; /* No rounding in this computation */
  762. il = c*thetal + clow*(thetah + thetal) + d*logrl + dlow*(logrh + logrl);
  763. /*
  764. * Now it remains to take the exponential of the extended precision
  765. * value in ((rh, rl), (ih, il)).
  766. * Range reduce the real part by multiples of log(2), and the imaginary
  767. * part by multiples of pi/2.
  768. */
  769. rh = fp_add(rh, rl, &rl); /* renormalise */
  770. r = rh + rl; /* Approximate value */
  771. #define _recip_log_2 1.4426950408889634074
  772. q = r * _recip_log_2;
  773. m = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
  774. q = (double)m;
  775. #undef _recip_log_2
  776. /*
  777. * log 2 = 11629080/2^24 - 0.000 00000 19046 54299 95776 78785
  778. * to reasonable accuracy. It is vital that the exact part be
  779. * read in as a number with lots of low zero bits, so when it gets
  780. * multiplied by the integer q there is NO rounding needed.
  781. */
  782. #define _exact_part_log2 0.693147182464599609375
  783. #define _approx_part_log2 (-1.9046542999577678785418e-9)
  784. rh = rh - q*_exact_part_log2;
  785. rl = rl - q*_approx_part_log2;
  786. #undef _exact_part_log2
  787. #undef _approx_part_log2
  788. r = exp(rh + rl); /* This should now be accurate enough */
  789. ih = fp_add(ih, il, &il);
  790. i = ih + il; /* Approximate value */
  791. #define _recip_pi_by_2 0.6366197723675813431
  792. q = i * _recip_pi_by_2;
  793. n = (q < 0.0) ? (int)(q - 0.5) : (int)(q + 0.5);
  794. q = (double)n;
  795. /*
  796. * pi/2 = 105414357/2^26 + 0.000 00000 09920 93579 68054 04416 39751
  797. * to reasonable accuracy. It is vital that the exact part be
  798. * read in as a number with lots of low zero bits, so when it gets
  799. * multiplied by the integer q there is NO rounding needed.
  800. */
  801. #define _exact_part_pi2 1.57079632580280303955078125
  802. #define _approx_part_pi2 9.92093579680540441639751e-10
  803. ih = ih - q*_exact_part_pi2;
  804. il = il - q*_approx_part_pi2;
  805. #undef _recip_pi_by_2
  806. #undef _exact_part_pi2
  807. #undef _approx_part_pi2
  808. i = ih + il; /* Now accurate enough */
  809. /*
  810. * Having done super-careful range reduction I can call the regular
  811. * sin/cos routines here. If sin/cos could both be computed together
  812. * that could speed things up a little bit, but by this stage they have
  813. * not much in common.
  814. */
  815. cw = cos(i);
  816. sw = sin(i);
  817. switch (n & 3) /* quadrant control */
  818. {
  819. default:
  820. case 0: cost = cw; sint = sw; break;
  821. case 1: cost = -sw; sint = cw; break;
  822. case 2: cost = -cw; sint = -sw; break;
  823. case 3: cost = sw; sint = -cw; break;
  824. }
  825. /*
  826. * Now, at long last, I can assemble the results and return.
  827. */
  828. z1.real = ldexp(r*cost, m);
  829. z1.imag = ldexp(r*sint, m);
  830. return z1;
  831. }
  832. /*
  833. * End of complex-to-complex-power code.
  834. */
  835. #endif
  836. /* end of arith07.c */