1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285 |
- /* arith02.c Copyright (C) 1990/1991 Codemist Ltd */
- /*
- * Arithmetic functions.
- * Multiplication of generic numbers
- * and in particular a lot of bignum support.
- *
- * Version 1.4 February 1991 - Fast multiplication.
- */
- /* Signature: 4a0039df 31-May-1997 */
- #include <stdarg.h>
- #include <string.h>
- #include <ctype.h>
- #include <math.h>
- #ifdef __WATCOMC__
- #include <float.h>
- #endif
- #include "machine.h"
- #include "tags.h"
- #include "cslerror.h"
- #include "externs.h"
- #include "arith.h"
- #ifdef TIMEOUT
- #include "timeout.h"
- #endif
- /*
- * Now for multiplication
- */
- /*
- * I provide symbols IMULTIPLY and IDIVIDE which can be asserted if the
- * corresponding routines have been provided elsewhere (e.g. in machine
- * code for extra speed)
- */
- #ifndef IMULTIPLY
- #ifdef MULDIV64
- /*
- * If MULDIV64 is asserted this function in not in fact needed
- * since a macro in arith.h arranges that the multiplication is done
- * in-line. However this version is left here in the source code as
- * convenient documentation of what the function needs to do.
- */
-
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * rlow may well be the same as one of a, b or c so the
- * assignment to *rlow must not be done until the calculation is
- * complete. Inputs a and b are 31-bit values (i.e. they have
- * their top bit zero and are to be treated as positive value.
- * c may use all 32 bits, but again is treated as unsigned.
- * The result is computed and the low 31 bits placed in rlow,
- * (the top bit of rlow will end up zero) and the top part
- * of the result is returned.
- */
- {
- /* NB the value r cound be int64 or unsigned64 - it does not matter */
- unsigned64 r = (unsigned64)a*(unsigned64)b + (unsigned64)c;
- *rlow = (unsigned32)(r & 0x7fffffff);
- return (unsigned32)(r >> 31);
- }
- #else
- #ifdef OLDER_VERSION
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * The code given here forms the produce by doing four single-precision
- * (16*16->32) multiplies and using shifts etc to glue the partial results
- * together.
- */
- {
- unsigned32 ah = a >> 16, bh = b >> 16, ch = c >> 16;
- unsigned32 w0, w1, w2, w3, w4;
- a -= ah << 16;
- b -= bh << 16;
- c -= ch << 16; /* a, b and c now split into high/low parts */
- w0 = a*b; /* One... */
- w1 = w0 >> 16;
- w0 = w0 - (w1 << 16) + c;
- w1 += ch;
- w2 = a*bh; /* Two... */
- w3 = w2 >> 16;
- w1 += w2 - (w3 << 16);
- w2 = ah*b; /* Three... */
- w4 = w2 >> 16;
- w1 += w2 - (w4 << 16);
- w2 = w0 >> 16;
- w1 += w2;
- w0 -= w2 << 16;
- w2 = ah*bh + w3 + w4; /* Four 16-bit multiplies done in all. */
- w2 += w1 >> 16;
- /* Here I do a minor shift to split the result at bit 31 rather than 32 */
- *rlow = w0 + ((w1 & 0x7fff) << 16);
- return (w2<<1) + ((w1>>15) & 1);
- }
- #else
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * The code given here forms the produce by doing three single-precision
- * (16*16->32) multiplies and using shifts etc to glue the partial results
- * together. This is slightly faster than the above (maybe simpler?) code
- * on at least some machines.
- */
- {
- unsigned32 ah, bh;
- unsigned32 w0, w1, w2;
- ah = a >> 16;
- /*
- * On some machines I know that multi-bit shifts are especially painful,
- * while on others it is nasty to access the literal value needed as a
- * mask here. Hence I make some show of providing an alternative.
- */
- #ifdef FAST_SHIFTS
- a -= ah << 16;
- #else
- a &= 0xffff;
- #endif
- bh = b >> 16;
- #ifdef FAST_SHIFTS
- b -= bh << 16;
- #else
- b &= 0xffff;
- #endif
- /*
- * At present I can not see any way of issuing any of these multiplies
- * any earlier, or of doing useful work between the times that I launch
- * them, or even delaying before I use their results. This will be rather
- * sad on machines where multiplication could overlap with other operations.
- */
- w2 = ah*bh;
- w1 = (a + ah)*(b + bh);
- w0 = a*b;
- /*
- * The largest exact result that can be computed by the next line (given
- * that a and b start off just 31 bit) is 0xfffd0002
- */
- w1 = (w1 - w2) - w0; /* == (a*bh + b*ah) */
- /*
- * I split into 30 bits in the lower word and 32 in the upper, so I have
- * 2 bits available for temporary carry effects in the lower part.
- */
- w2 = (w2 << 2) + (w1 >> 14) + (w0 >> 30) + (c >> 30);
- w1 &= 0x3fff;
- w0 = (c & 0x3fffffff) + (w0 & 0x3fffffffU) + (w1 << 16);
- w0 += ((w2 & 1) << 30);
- w2 = (w2 >> 1) + (w0 >> 31);
- *rlow = w0 & 0x7fffffffU;
- return w2;
- }
- #endif
- #endif
- #endif /* IMULTIPLY */
- static Lisp_Object timesii(Lisp_Object a, Lisp_Object b)
- /*
- * multiplying two fixnums together is much messier than adding them,
- * mainly because the result can easily be a two-word bignum
- */
- {
- unsigned32 aa = (unsigned32)int_of_fixnum(a),
- bb = (unsigned32)int_of_fixnum(b);
- unsigned32 temp, low, high;
- /*
- * Multiplication by 0 or by -1 is just possibly common enough to be worth
- * filtering out the following special cases. Avoidance of the tedious
- * checks for overflow may make this useful even if Imultiply is very fast.
- */
- if (aa <= 1)
- { if (aa == 0) return fixnum_of_int(0);
- else return b;
- }
- else if (bb <= 1)
- { if (bb == 0) return fixnum_of_int(0);
- else return a;
- }
- /*
- * I dump the low part of the product in temp then copy to a variable low
- * because temp has to have its address taken and so is not a candidate for
- * living in a register.
- */
- Dmultiply(high, temp, clear_top_bit(aa), clear_top_bit(bb), 0);
- /*
- * The result handed back here has only 31 bits active in its low part.
- */
- low = temp;
- /*
- * The next two lines convert the unsigned product produced by Imultiply
- * into a signed product.
- */
- if ((int32)aa < 0) high -= bb;
- if ((int32)bb < 0) high -= aa;
- if ((high & 0x40000000) == 0) high = clear_top_bit(high);
- else high = set_top_bit(high);
- if (high == 0 && (low & 0x40000000) == 0)
- { /* one word positive result */
- if (low <= 0x07ffffff) return fixnum_of_int(low);
- else return make_one_word_bignum(low);
- }
- else if (high == -1 && (low & 0x40000000) != 0)
- { /* one word negative result */
- low = set_top_bit(low);
- if ((low & fix_mask) == fix_mask) return fixnum_of_int(low);
- else return make_one_word_bignum(low);
- }
- else
- { /* two-word bignum result needed */
- Lisp_Object w = getvector(TAG_NUMBERS, TYPE_BIGNUM, 12), nil;
- errexit();
- ((int32 *)((char *)w - TAG_NUMBERS))[1] = low;
- ((int32 *)((char *)w - TAG_NUMBERS))[2] = high;
- ((int32 *)((char *)w - TAG_NUMBERS))[3] = 0; /* padder word */
- return w;
- }
- }
- #ifdef COMMON
- static Lisp_Object timesis(Lisp_Object a, Lisp_Object b)
- {
- Float_union bb;
- bb.i = b - TAG_SFLOAT;
- bb.f = (float) ((float)int_of_fixnum(a) * bb.f);
- return (bb.i & ~(int32)0xf) + TAG_SFLOAT;
- }
- #endif
- static Lisp_Object timesib(Lisp_Object a, Lisp_Object b)
- {
- int32 aa = int_of_fixnum(a), lenb, i;
- unsigned32 carry, ms_dig, w;
- Lisp_Object c, nil;
- /*
- * I will split off the (easy) cases of the fixnum being -1, 0 or 1.
- */
- if (aa == 0) return fixnum_of_int(0);
- else if (aa == 1) return b;
- else if (aa == -1) return negateb(b);
- lenb = bignum_length(b);
- push(b);
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, lenb);
- pop(b);
- errexit();
- lenb = (lenb >> 2) - 1;
- if (aa < 0)
- { aa = -aa;
- carry = 0xffffffffU;
- for (i=0; i<lenb-1; i++)
- { carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry);
- bignum_digits(c)[i] = clear_top_bit(carry);
- }
- /*
- * I do the most significant digit separately.
- */
- carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry);
- /*
- * there is a special case needed here - if b started off as a number
- * like 0xc0000000,0,0,0 then negating it would call for extension of
- * the bignum c (this is the usual assymetry in the range of twos-
- * complement numbers). But if I detect that case specially I can
- * observe that the ONLY case where negation overflows is where the
- * negated value is exactly a power of 2 such .. an easy thing to
- * multiply a by! Furthermore the power of two involved is known to
- * by 30 mod 31.
- */
- if (carry == 0x40000000)
- { bignum_digits(c)[i] = (aa & 1) << 30;
- aa = aa >> 1;
- goto extend_by_one_word;
- }
- if ((carry & 0x40000000) != 0) carry = set_top_bit(carry);
- bignum_digits(c)[i] = carry;
- }
- else
- { for (i=0; i<lenb; i++) bignum_digits(c)[i] = bignum_digits(b)[i];
- }
- /*
- * Now c is a copy of b (negated if necessary) and I just want to
- * multiply it by the positive value a. This is the heart of the
- * procedure. Re-write Imultiply in assembly code if you want it
- * to go faster. See the top of this file for a portable sample
- * implementation of Imultiply, which gives back a result as a pair
- * of 31-bit values.
- */
- carry = 0;
- /*
- * here aa is > 0, and a fortiori the 0x80000000 bit of aa is clear,
- * so I do not have to worry about the difference between 31 and 32
- * bit values for aa.
- */
- for (i=0; i<lenb-1; i++)
- Dmultiply(carry, bignum_digits(c)[i], bignum_digits(c)[i],
- (unsigned32)aa, carry);
- ms_dig = bignum_digits(c)[i];
- Dmultiply(carry, w, clear_top_bit(ms_dig), (unsigned32)aa, carry);
- /*
- * After forming the product to (lenb) digits I need to see if there
- * is any overflow. Calculate what the next digit would be, sign
- * extending into the 0x80000000 bit as necessary.
- */
- if ((carry & 0x40000000) != 0) carry = set_top_bit(carry);
- if (((int32)ms_dig) < 0) carry -= aa;
- aa = (int32)carry;
- if (aa == -1 && (w & 0x40000000) != 0)
- { bignum_digits(c)[i] = set_top_bit(w);
- return c;
- }
- bignum_digits(c)[i] = w;
- if (aa == 0 && (w & 0x40000000) == 0) return c;
- /*
- * drop through to extend the number by a word - note that because I
- * am multiplying by a fixnum it is only possible to have to expand by
- * just one word.
- */
- extend_by_one_word:
- if ((lenb & 1) == 0)
- /*
- * Here there was a padder word that I can expand into.
- */
- { bignum_digits(c)[lenb] = aa;
- numhdr(c) += pack_hdrlength(1);
- return c;
- }
- /*
- * Need to allocate more space to grow into.
- */
- push(c);
- a = getvector(TAG_NUMBERS, TYPE_BIGNUM, (lenb<<2)+8);
- pop(c);
- errexit();
- for (i=0; i<lenb; i++)
- bignum_digits(a)[i] = bignum_digits(c)[i];
- bignum_digits(a)[i] = aa;
- bignum_digits(a)[i+1] = 0; /* the padder word */
- return a;
- }
- #ifdef COMMON
- static Lisp_Object timesir(Lisp_Object a, Lisp_Object b)
- /*
- * multiply integer (fixnum or bignum) by ratio.
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- if (a == fixnum_of_int(0)) return a;
- else if (a == fixnum_of_int(1)) return b;
- push3(b, a, nil);
- #define g stack[0]
- #define a stack[-1]
- #define b stack[-2]
- g = gcd(a, denominator(b));
- nil = C_nil;
- if (exception_pending()) goto fail;
- a = quot2(a, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- g = quot2(denominator(b), g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- a = times2(a, numerator(b));
- nil = C_nil;
- if (exception_pending()) goto fail;
- /*
- * make_ratio tidies things up if the denominator was exactly 1
- */
- w = make_ratio(a, g);
- fail:
- popv(3);
- return w;
- #undef a
- #undef b
- #undef g
- }
- static Lisp_Object timesic(Lisp_Object a, Lisp_Object b)
- /*
- * multiply an arbitrary non-complex number by a complex one
- */
- {
- Lisp_Object nil;
- Lisp_Object r = real_part(b), i = imag_part(b);
- push2(a, r);
- i = times2(a, i);
- pop2(r, a);
- errexit();
- push(i);
- r = times2(a, r);
- pop(i);
- errexit();
- return make_complex(r, i);
- }
- #endif
- static Lisp_Object timesif(Lisp_Object a, Lisp_Object b)
- {
- double d = (double)int_of_fixnum(a) * float_of_number(b);
- return make_boxfloat(d, type_of_header(flthdr(b)));
- }
- #ifdef COMMON
- #define timessi(a, b) timesis(b, a)
- static Lisp_Object timessb(Lisp_Object a, Lisp_Object b)
- {
- double d = float_of_number(a) * float_of_number(b);
- return make_sfloat(d);
- }
- #define timessr(a, b) timessb(a, b)
- #define timessc(a, b) timesic(a, b)
- #endif
- static Lisp_Object timessf(Lisp_Object a, Lisp_Object b)
- {
- double d = float_of_number(a) * float_of_number(b);
- return make_boxfloat(d, type_of_header(flthdr(b)));
- }
- #define timesbi(a, b) timesib(b, a)
- #ifdef COMMON
- #define timesbs(a, b) timessb(b, a)
- #endif
- /*
- * Now for bignum multiplication - made more than comfortably complicated
- * by a desire to make it go fast for very big numbers.
- */
- #ifndef KARATSUBA_CUTOFF
- /*
- * I have conducted some experiments on one machine to find out what the
- * best cut-off value here is. The exact value chosen is not very
- * critical, and the fancy techniques do not pay off until numbers get
- * a lot bigger than this length (which is expressed in units of 31-bit
- * words, i.e. about 10 decimals). Anyone who wants may recompile with
- * alternative values to try to get the system fine-tuned for their
- * own computer - but I do not expect it to be possible to achieve much
- * by so doing.
- */
- #define KARATSUBA_CUTOFF 12
- #endif
- static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc);
- static void long_times1(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc)
- /*
- * Here both a and b are big, with lena <= lenb. Split each into two chunks
- * of size (lenc/4), say (a1,a2) and (b1,b2), and compute each of
- * a1*b1
- * a2*b2
- * (a1+a2)*(b1+b2)
- * where in the last case a1+a2 and b1+b2 can be computed keeping their
- * carry bits as k1 and k2 (so that a1+a2 and b1+b2 are restricted to
- * size lenb). The chunks can then be combined with a few additions and
- * subtractions to form the product that is really wanted. If in fact a
- * was shorter than lenc/4 (so that after a is split up the top half is
- * all zero) I do things in a more straightforward way. I require that on
- * entry to this code lenc<4 < lenb <= lenc/2.
- */
- {
- int32 h = lenc/4; /* lenc must have been made even enough... */
- int32 lena1 = lena - h;
- int32 lenb1 = lenb - h;
- unsigned32 carrya, carryb;
- int32 i;
- /*
- * if the top half of a would be all zero I go through a separate path,
- * doing just two subsidiary multiplications.
- */
- if (lena1 <= 0)
- { long_times(c+h, a, b+h, d, lena, lenb-h, 2*h);
- for (i=0; i<h; i++) c[3*h+i] = 0;
- long_times(d, a, b, c, lena, h, 2*h);
- for (i=0; i<h; i++) c[i] = d[i];
- carrya = 0;
- for (;i<2*h; i++)
- { unsigned32 w = c[i] + d[i] + carrya;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (;carrya!=0;i++)
- { unsigned32 w = c[i] + 1;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- return;
- }
- /*
- * form (a1+a2) and (b1+b2).
- */
- carrya = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = a[i] + carrya;
- if (i < lena1) w += a[h+i];
- d[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carryb = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = b[i] + carryb;
- if (i < lenb1) w += b[h+i];
- d[h+i] = clear_top_bit(w);
- carryb = w >> 31;
- }
- long_times(c+h, d, d+h, c, h, h, 2*h);
- /*
- * Adjust to allow for the cases of a1+a2 or b1+b2 overflowing
- * by a single bit.
- */
- c[3*h] = carrya & carryb;
- if (carrya != 0)
- { carrya = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = c[2*h+i] + d[h+i] + carrya;
- c[2*h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- }
- if (carryb != 0)
- { carryb = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = c[2*h+i] + d[i] + carryb;
- c[2*h+i] = clear_top_bit(w);
- carryb = w >> 31;
- }
- }
- c[3*h] += carrya + carryb;
- for (i=1; i<h; i++) c[3*h+i] = 0;
- /*
- * Now (a1+a2)*(b1+b2) should have been computed totally properly
- */
- for (i=0; i<h; i++) d[h+i] = 0;
- /*
- * multiply out a1*b1, where note that a1 and b1 may be less long
- * than h, but not by much.
- */
- long_times(d, a+h, b+h, c, lena-h, lenb-h, 2*h);
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[2*h+i] + d[i] + carrya;
- c[2*h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[h+i] - d[i] - carrya;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<3*h; i++)
- { unsigned32 w = c[h+i] - 1;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- /*
- * multiply out a2*b2
- */
- long_times(d, a, b, c, h, h, 2*h);
- for (i=0; i<h; i++) c[i] = d[i];
- carrya = 0;
- for (; i<2*h; i++)
- { unsigned32 w = c[i] + d[i] + carrya;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<4*h; i++)
- { unsigned32 w = c[i] + 1;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[h+i] - d[i] - carrya;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<3*h; i++)
- { unsigned32 w = c[h+i] - 1;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- /*
- * The product is now complete
- */
- }
- static void long_times2(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- int32 lena, int32 lenb, int32 lenc)
- /*
- * This case is standard old fashioned long multiplication. Dump the
- * result into c.
- */
- {
- int32 i;
- for (i=0; i<lenc; i++) c[i] = 0;
- for (i=0; i<lena; i++)
- { unsigned32 carry = 0, da = a[i];
- int32 j;
- /*
- * When I multiply by (for instance) a high power of 2 there will
- * be plenty of zero digits in the number being worked with, and
- * so the test da!=0 will save something useful.
- */
- if (da != 0)
- { for (j=0; j<lenb; j++)
- { int32 k = i + j;
- Dmultiply(carry, c[k], da, b[j],
- /* NB the addition here is OK and fits into a 32-bit unsigned result */
- carry + c[k]);
- }
- c[i+j] = carry;
- }
- }
- }
- static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc)
- /*
- * This decides if a multiplication is big enough to benefit from
- * decomposition a la Karatsuba.
- * In recursive entries through here out of long_times1() the numbers a
- * and b may have shrunk in ways that mean I need to reconsider the
- * precision to which I am working. This must leave c filled out all
- * the way to lenc, with padding 0s if necessary.
- */
- {
- if (lenb < lena)
- { unsigned32 *t1;
- int32 t2;
- t1 = a; a = b; b = t1;
- t2 = lena; lena = lenb; lenb = t2;
- }
- if (4*lenb <= lenc) /* In this case I should shrink lenc a bit.. */
- { int32 newlenc = (lenb+1)/2;
- int k = 0;
- while (newlenc > KARATSUBA_CUTOFF)
- { newlenc = (newlenc + 1)/2;
- k++;
- }
- while (k != 0)
- { newlenc = 2*newlenc;
- k--;
- }
- newlenc = 4*newlenc;
- while (lenc > newlenc) c[--lenc] = 0;
- }
- if (lena > KARATSUBA_CUTOFF) long_times1(c, a, b, d, lena, lenb, lenc);
- else long_times2(c, a, b, lena, lenb, lenc);
- }
- static Lisp_Object timesbb(Lisp_Object a, Lisp_Object b)
- /*
- * a and b are both guaranteed to be bignums when I call this
- * procedure.
- */
- {
- int sign = 1;
- Lisp_Object c, d, nil;
- int32 lena, lenb, lenc, i;
- lena = (bignum_length(a) >> 2) - 1;
- lenb = (bignum_length(b) >> 2) - 1;
- if (lena == 1 && lenb == 1)
- /*
- * I am going to deem multiplication of two one-word bignums worthy of
- * a special case, since it is probably fairly common and it will be cheap
- * enough that avoiding overheads might matter. I still need to split
- * off the signs, because Imultiply can only deal with 31-bit unsigned values.
- * One-word bignums each have value at least 2^27 (or else they would have
- * been represented as fixnums) so the result here will always be a two-word
- * bignum.
- */
- { int32 va = (int32)bignum_digits(a)[0],
- vb = (int32)bignum_digits(b)[0], vc;
- unsigned32 vclow;
- if (va < 0)
- { if (vb < 0) Dmultiply(vc, vclow, -va, -vb, 0);
- else
- { Dmultiply(vc, vclow, -va, vb, 0);
- if (vclow == 0) vc = -vc;
- else
- { vclow = clear_top_bit(-(int32)vclow);
- vc = ~vc;
- }
- }
- }
- else if (vb < 0)
- { Dmultiply(vc, vclow, va, -vb, 0);
- if (vclow == 0) vc = -vc;
- else
- { vclow = clear_top_bit(-(int32)vclow);
- vc = ~vc;
- }
- }
- else Dmultiply(vc, vclow, va, vb, 0);
- return make_two_word_bignum(vc, vclow);
- }
- /*
- * I take the absolute values of the two input values a and b,
- * recording what the eventual sign for the product will need to be.
- */
- if (((int32)bignum_digits(a)[lena-1]) < 0)
- { sign = -sign;
- push(b);
- /*
- * Negating a negative bignum can sometimes mean that it will
- * have to get longer (because of the twos complement assymmetry),
- * but can never cause it to shrink, In particular it can never lead
- * to demotion to a fixnum, so after this call to negateb it is still
- * OK to assume that a is a bignum.
- */
- a = negateb(a);
- pop(b);
- errexit();
- lena = (bignum_length(a) >> 2) - 1;
- }
- if (((int32)bignum_digits(b)[lenb-1]) < 0)
- { sign = -sign;
- push(a);
- /* see above comments about negateb */
- b = negateb(b);
- pop(a);
- errexit();
- lenb = (bignum_length(b) >> 2) - 1;
- }
- if (lenb < lena) /* Commute so that b is at least as long as a */
- { c = a;
- a = b;
- b = c;
- lenc = lena;
- lena = lenb;
- lenb = lenc;
- }
- push2(a, b);
- /*
- * For very big numbers I have two special actions called for here. First
- * I must round up the size of the result vector to have a big enough power
- * of two as a factor so that (recursive) splitting in two does not cause
- * trouble later. Then I have to allocate some workspace, the size of that
- * being related to the size of the numbers being handled.
- */
- if (lena > KARATSUBA_CUTOFF)
- {
- int k = 0;
- /*
- * I pad lenc up to have a suitably large power of 2 as a factor so
- * that splitting numbers in half works neatly for me.
- */
- lenc = (lenb+1)/2; /* rounded up half-length of longer number */
- while (lenc > KARATSUBA_CUTOFF)
- { lenc = (lenc + 1)/2;
- k++;
- }
- while (k != 0)
- { lenc = 2*lenc;
- k--;
- }
- lenc = 2*lenc;
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + 2*lenc) << 2);
- errexitn(2);
- /*
- * The next line seems pretty shameless, but it may reduce the amount of
- * garbage collection I do. When the workspace vector needed is short enough
- * (at present up to 256 bytes) I use the character assembly buffer (boffo)
- * as my workspace, relying on an expectation that bignumber multiplication
- * can never be triggered from places within the reader where that buffer is
- * in use for its normal purpose. I forge tag bits to make boffo look like
- * a number here, but can never trigger garbage collection in a way that
- * might inspect same, so that too is safe at present.
- */
- if (((1 + lenc) << 2) <= (int32)length_of_header(vechdr(boffo)))
- d = (Lisp_Object)((char *)boffo + TAG_NUMBERS - TAG_VECTOR);
- else
- { push(c);
- d = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + lenc) << 2);
- pop(c);
- }
- lenc = 2*lenc;
- }
- else
- {
- /*
- * In cases where I will use classical long multiplication there is no
- * need to waste space with extra padding or with the workspace vector d.
- */
- lenc = lena + lenb;
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, (1 + lenc) << 2);
- d = c; /* set d to avoid dataflow anomaly */
- }
- pop2(b, a);
- errexit();
- { unsigned32 *da = &bignum_digits(a)[0],
- *db = &bignum_digits(b)[0],
- *dc = &bignum_digits(c)[0],
- *dd = &bignum_digits(d)[0];
- long_times(dc, da, db, dd, lena, lenb, lenc);
- }
- /*
- * Here the absolute value of the product has been computed properly.
- * The result can easily have a zero top digit, which will need trimming
- * off. If at least one of the input values was a number which had to
- * be represented with a zero leading digit (e.g. 0x40000000) then the
- * product can have two leading zero digits here. Similarly for negative
- * results. Also padding (to allow splitting numbers into two) can have
- * resulted in extra padding up at the top. lenc gives the size of vector
- * that was allocated, lena+lenb is a much better guess of how much data
- * is active in it.
- */
- errexit();
- { int32 newlenc = lena + lenb;
- /*
- * I tidy up by putting a zero in any padding word above the top of the
- * active data, and by inserting a header in space that gets trimmed off
- * in such a way that the garbage collector will not get upset. This
- * all just roughly trims the numbers - fine adjustment follows.
- */
- if ((newlenc & 1) == 0)
- { bignum_digits(c)[newlenc] = 0;
- if (lenc != newlenc) /* i.e. I padded out somewhat */
- {
- bignum_digits(c)[newlenc+1] = make_bighdr(lenc-newlenc);
- lenc = newlenc;
- numhdr(c) = make_bighdr(lenc+1);
- }
- }
- else if (lenc != newlenc) /* i.e. I padded out somewhat */
- {
- bignum_digits(c)[newlenc] = make_bighdr(lenc-newlenc+1);
- lenc = newlenc;
- numhdr(c) = make_bighdr(lenc+1);
- }
- }
- /*
- * Now I am safe against the garbage collector, and the number c has as
- * its length just lena+lenb, even if it had been padded out earlier.
- */
- if (sign < 0)
- { unsigned32 carry = 0xffffffffU;
- for (i=0; i<lenc-1; i++)
- { carry = clear_top_bit(~bignum_digits(c)[i]) + top_bit(carry);
- bignum_digits(c)[i] = clear_top_bit(carry);
- }
- carry = ~bignum_digits(c)[i] + top_bit(carry);
- if (carry != -1)
- { bignum_digits(c)[i] = carry;
- return c; /* no truncation needed */
- }
- carry = bignum_digits(c)[i-1];
- if ((carry & 0x40000000) == 0)
- { bignum_digits(c)[i] = 0xffffffffU;
- return c; /* no truncation becase of previous digit */
- }
- /*
- * I need to argue that lenc was at least 2, so bignum_digits(c)[i-2]
- * could at worst access the header word of the bignum - but it can never
- * do that because if it were doing so then the bignum product would
- * be about to have a value zero or thereabouts. One-word bignums are not
- * allowed to have leading zero digits.
- */
- if (carry == 0x7fffffff &&
- (bignum_digits(c)[i-2] & 0x40000000) != 0) /* chop 2 */
- { bignum_digits(c)[i-2] |= ~0x7fffffff;
- /*
- * I common up the code to chop off two words from the number at label "chop2"
- */
- goto chop2;
- }
- bignum_digits(c)[i-1] |= ~0x7fffffff;
- /* Drop through to truncate by 1 and sometimes that is easy */
- }
- else
- { unsigned32 w = bignum_digits(c)[lenc-1];
- if (w != 0) return c; /* no truncation */
- w = bignum_digits(c)[lenc-2];
- if ((w & 0x40000000) != 0) return c;
- if (w == 0 &&
- (bignum_digits(c)[lenc-3] & 0x40000000) == 0) goto chop2;
- /* truncate one word */
- }
- /*
- * here the data in the bignum is all correct (even in the most significant
- * digit) but I need to shrink the number by one word. Because of all the
- * doubleword alignment that is used here this can sometimes be done very
- * easily, and other times it involves forging a short bit of dummy data
- * to fill in a gap that gets left in the heap.
- */
- numhdr(c) -= pack_hdrlength(1);
- if ((lenc & 1) != 0) bignum_digits(c)[lenc-1] = 0; /* tidy up */
- else bignum_digits(c)[lenc-1] = make_bighdr(2);
- return c;
- chop2:
- /*
- * Trim two words from the number c
- */
- numhdr(c) -= pack_hdrlength(2);
- lenc -= 2;
- bignum_digits(c)[lenc] = 0;
- lenc |= 1;
- bignum_digits(c)[lenc] = make_bighdr(2);
- return c;
- }
- #ifdef COMMON
- #define timesbr(a, b) timesir(a, b)
- #define timesbc(a, b) timesic(a, b)
- #endif
- #define timesbf(a, b) timessf(a, b)
- #ifdef COMMON
- #define timesri(a, b) timesir(b, a)
- #define timesrs(a, b) timessr(b, a)
- #define timesrb(a, b) timesbr(b, a)
- static Lisp_Object timesrr(Lisp_Object a, Lisp_Object b)
- /*
- * multiply a pair of rational numbers
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- push5(numerator(a), denominator(a),
- numerator(b), denominator(b), nil);
- #define g stack[0]
- #define db stack[-1]
- #define nb stack[-2]
- #define da stack[-3]
- #define na stack[-4]
- g = gcd(na, db);
- nil = C_nil;
- if (exception_pending()) goto fail;
- na = quot2(na, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- db = quot2(db, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- g = gcd(nb, da);
- nil = C_nil;
- if (exception_pending()) goto fail;
- nb = quot2(nb, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- da = quot2(da, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- na = times2(na, nb);
- nil = C_nil;
- if (exception_pending()) goto fail;
- da = times2(da, db);
- nil = C_nil;
- if (exception_pending()) goto fail;
- w = make_ratio(na, da);
- fail:
- popv(5);
- return w;
- #undef g
- #undef db
- #undef nb
- #undef da
- #undef na
- }
- #define timesrc(a, b) timesic(a, b)
- #define timesrf(a, b) timessf(a, b)
- #define timesci(a, b) timesic(b, a)
- #define timescs(a, b) timessc(b, a)
- #define timescb(a, b) timesbc(b, a)
- #define timescr(a, b) timesrc(b, a)
- static Lisp_Object timescc(Lisp_Object a, Lisp_Object b)
- /*
- * multiply a pair of complex values
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- push4(real_part(a), imag_part(a),
- real_part(b), imag_part(b));
- push2(nil, nil);
- #define u stack[0]
- #define v stack[-1]
- #define ib stack[-2]
- #define rb stack[-3]
- #define ia stack[-4]
- #define ra stack[-5]
- u = times2(ra, rb);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = times2(ia, ib);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = negate(v);
- nil = C_nil;
- if (exception_pending()) goto fail;
- u = plus2(u, v); /* real part of result */
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = times2(ra, ib);
- nil = C_nil;
- if (exception_pending()) goto fail;
- ib = times2(rb, ia);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = plus2(v, ib); /* imaginary part */
- nil = C_nil;
- if (exception_pending()) goto fail;
- w = make_complex(u, v);
- fail:
- popv(6);
- return w;
- #undef u
- #undef v
- #undef ib
- #undef rb
- #undef ia
- #undef ra
- }
- #define timescf(a, b) timesci(a, b)
- #endif
- #define timesfi(a, b) timesif(b, a)
- #ifdef COMMON
- #define timesfs(a, b) timessf(b, a)
- #endif
- #define timesfb(a, b) timesbf(b, a)
- #ifdef COMMON
- #define timesfr(a, b) timesrf(b, a)
- #define timesfc(a, b) timescf(b, a)
- #endif
- static Lisp_Object timesff(Lisp_Object a, Lisp_Object b)
- /*
- * multiply boxed floats - see commentary on plusff()
- */
- {
- #ifdef COMMON
- int32 ha = type_of_header(flthdr(a)), hb = type_of_header(flthdr(b));
- #endif
- double d = float_of_number(a) * float_of_number(b);
- #ifdef COMMON
- if (ha == TYPE_LONG_FLOAT || hb == TYPE_LONG_FLOAT)
- ha = TYPE_LONG_FLOAT;
- else if (ha == TYPE_DOUBLE_FLOAT || hb == TYPE_DOUBLE_FLOAT)
- ha = TYPE_DOUBLE_FLOAT;
- else ha = TYPE_SINGLE_FLOAT;
- return make_boxfloat(d, ha);
- #else
- return make_boxfloat(d, TYPE_DOUBLE_FLOAT);
- #endif
- }
- /*
- * ... and now for the dispatch code that copes with general
- * multiplication.
- */
- Lisp_Object times2(Lisp_Object a, Lisp_Object b)
- {
- switch ((int)a & TAG_BITS)
- {
- case TAG_FIXNUM:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesii(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesis(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesib(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesir(a, b);
- case TYPE_COMPLEX_NUM:
- return timesic(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesif(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #ifdef COMMON
- case TAG_SFLOAT:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timessi(a, b);
- case TAG_SFLOAT:
- { Float_union aa, bb; /* timesss() coded in-line */
- aa.i = a - TAG_SFLOAT;
- bb.i = b - TAG_SFLOAT;
- aa.f = (float) (aa.f * bb.f);
- return (aa.i & ~(int32)0xf) + TAG_SFLOAT;
- }
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timessb(a, b);
- case TYPE_RATNUM:
- return timessr(a, b);
- case TYPE_COMPLEX_NUM:
- return timessc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timessf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #endif
- case TAG_NUMBERS:
- { int32 ha = type_of_header(numhdr(a));
- switch (ha)
- {
- case TYPE_BIGNUM:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesbi(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesbs(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesbb(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesbr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesbc(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesbf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #ifdef COMMON
- case TYPE_RATNUM:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesri(a, b);
- case TAG_SFLOAT:
- return timesrs(a, b);
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesrb(a, b);
- case TYPE_RATNUM:
- return timesrr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesrc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesrf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- case TYPE_COMPLEX_NUM:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesci(a, b);
- case TAG_SFLOAT:
- return timescs(a, b);
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timescb(a, b);
- case TYPE_RATNUM:
- return timescr(a, b);
- case TYPE_COMPLEX_NUM:
- return timescc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timescf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #endif
- default: return aerror1("bad arg for times", a);
- }
- }
- case TAG_BOXFLOAT:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesfi(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesfs(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesfb(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesfr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesfc(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesff(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- default:
- return aerror1("bad arg for times", a);
- }
- }
- /* end of arith02.c */
|