123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311 |
- /* arith02.c Copyright (C) 1990-2002 Codemist Ltd */
- /*
- * Arithmetic functions.
- * Multiplication of generic numbers
- * and in particular a lot of bignum support.
- *
- */
- /*
- * This code may be used and modified, and redistributed in binary
- * or source form, subject to the "CCL Public License", which should
- * accompany it. This license is a variant on the BSD license, and thus
- * permits use of code derived from this in either open and commercial
- * projects: but it does require that updates to this code be made
- * available back to the originators of the package.
- * Before merging other code in with this or linking this code
- * with other packages or libraries please check that the license terms
- * of the other material are compatible with those of this.
- */
- /* Signature: 1884c4f0 08-Apr-2002 */
- #include <stdarg.h>
- #include <string.h>
- #include <ctype.h>
- #include <math.h>
- #ifdef __WATCOMC__
- #include <float.h>
- #endif
- #include "machine.h"
- #include "tags.h"
- #include "cslerror.h"
- #include "externs.h"
- #include "arith.h"
- #ifdef TIMEOUT
- #include "timeout.h"
- #endif
- /*
- * Now for multiplication
- */
- /*
- * I provide symbols IMULTIPLY and IDIVIDE which can be asserted if the
- * corresponding routines have been provided elsewhere (e.g. in machine
- * code for extra speed)
- */
- #ifndef IMULTIPLY
- #ifdef MULDIV64
- /*
- * If MULDIV64 is asserted this function in not in fact needed
- * since a macro in arith.h arranges that the multiplication is done
- * in-line. However this version is left here in the source code as
- * convenient documentation of what the function needs to do.
- */
-
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * rlow may well be the same as one of a, b or c so the
- * assignment to *rlow must not be done until the calculation is
- * complete. Inputs a and b are 31-bit values (i.e. they have
- * their top bit zero and are to be treated as positive value.
- * c may use all 32 bits, but again is treated as unsigned.
- * The result is computed and the low 31 bits placed in rlow,
- * (the top bit of rlow will end up zero) and the top part
- * of the result is returned.
- */
- {
- /* NB the value r cound be int64 or unsigned64 - it does not matter */
- unsigned64 r = (unsigned64)a*(unsigned64)b + (unsigned64)c;
- *rlow = (unsigned32)(r & 0x7fffffff);
- return (unsigned32)(r >> 31);
- }
- #else
- #ifdef OLDER_VERSION
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * The code given here forms the produce by doing four single-precision
- * (16*16->32) multiplies and using shifts etc to glue the partial results
- * together.
- */
- {
- unsigned32 ah = a >> 16, bh = b >> 16, ch = c >> 16;
- unsigned32 w0, w1, w2, w3, w4;
- a -= ah << 16;
- b -= bh << 16;
- c -= ch << 16; /* a, b and c now split into high/low parts */
- w0 = a*b; /* One... */
- w1 = w0 >> 16;
- w0 = w0 - (w1 << 16) + c;
- w1 += ch;
- w2 = a*bh; /* Two... */
- w3 = w2 >> 16;
- w1 += w2 - (w3 << 16);
- w2 = ah*b; /* Three... */
- w4 = w2 >> 16;
- w1 += w2 - (w4 << 16);
- w2 = w0 >> 16;
- w1 += w2;
- w0 -= w2 << 16;
- w2 = ah*bh + w3 + w4; /* Four 16-bit multiplies done in all. */
- w2 += w1 >> 16;
- /* Here I do a minor shift to split the result at bit 31 rather than 32 */
- *rlow = w0 + ((w1 & 0x7fff) << 16);
- return (w2<<1) + ((w1>>15) & 1);
- }
- #else
- unsigned32 Imultiply(unsigned32 *rlow, unsigned32 a, unsigned32 b, unsigned32 c)
- /*
- * (result, *rlow) = a*b + c as 62-bit product
- *
- * The code given here forms the produce by doing three single-precision
- * (16*16->32) multiplies and using shifts etc to glue the partial results
- * together. This is slightly faster than the above (maybe simpler?) code
- * on at least some machines.
- */
- {
- unsigned32 ah, bh;
- unsigned32 w0, w1, w2;
- ah = a >> 16;
- /*
- * On some machines I know that multi-bit shifts are especially painful,
- * while on others it is nasty to access the literal value needed as a
- * mask here. Hence I make some show of providing an alternative.
- */
- #ifdef FAST_SHIFTS
- a -= ah << 16;
- #else
- a &= 0xffff;
- #endif
- bh = b >> 16;
- #ifdef FAST_SHIFTS
- b -= bh << 16;
- #else
- b &= 0xffff;
- #endif
- /*
- * At present I can not see any way of issuing any of these multiplies
- * any earlier, or of doing useful work between the times that I launch
- * them, or even delaying before I use their results. This will be rather
- * sad on machines where multiplication could overlap with other operations.
- */
- w2 = ah*bh;
- w1 = (a + ah)*(b + bh);
- w0 = a*b;
- /*
- * The largest exact result that can be computed by the next line (given
- * that a and b start off just 31 bit) is 0xfffd0002
- */
- w1 = (w1 - w2) - w0; /* == (a*bh + b*ah) */
- /*
- * I split into 30 bits in the lower word and 32 in the upper, so I have
- * 2 bits available for temporary carry effects in the lower part.
- */
- w2 = (w2 << 2) + (w1 >> 14) + (w0 >> 30) + (c >> 30);
- w1 &= 0x3fff;
- w0 = (c & 0x3fffffff) + (w0 & 0x3fffffffU) + (w1 << 16);
- w0 += ((w2 & 1) << 30);
- w2 = (w2 >> 1) + (w0 >> 31);
- *rlow = w0 & 0x7fffffffU;
- return w2;
- }
- #endif
- #endif
- #endif /* IMULTIPLY */
- static Lisp_Object timesii(Lisp_Object a, Lisp_Object b)
- /*
- * multiplying two fixnums together is much messier than adding them,
- * mainly because the result can easily be a two-word bignum
- */
- {
- unsigned32 aa = (unsigned32)int_of_fixnum(a),
- bb = (unsigned32)int_of_fixnum(b);
- unsigned32 temp, low, high;
- /*
- * Multiplication by 0 or by -1 is just possibly common enough to be worth
- * filtering out the following special cases. Avoidance of the tedious
- * checks for overflow may make this useful even if Imultiply is very fast.
- */
- if (aa <= 1)
- { if (aa == 0) return fixnum_of_int(0);
- else return b;
- }
- else if (bb <= 1)
- { if (bb == 0) return fixnum_of_int(0);
- else return a;
- }
- /*
- * I dump the low part of the product in temp then copy to a variable low
- * because temp has to have its address taken and so is not a candidate for
- * living in a register.
- */
- Dmultiply(high, temp, clear_top_bit(aa), clear_top_bit(bb), 0);
- /*
- * The result handed back here has only 31 bits active in its low part.
- */
- low = temp;
- /*
- * The next two lines convert the unsigned product produced by Imultiply
- * into a signed product.
- */
- if ((int32)aa < 0) high -= bb;
- if ((int32)bb < 0) high -= aa;
- if ((high & 0x40000000) == 0) high = clear_top_bit(high);
- else high = set_top_bit(high);
- if (high == 0 && (low & 0x40000000) == 0)
- { /* one word positive result */
- if (low <= 0x07ffffff) return fixnum_of_int(low);
- else return make_one_word_bignum(low);
- }
- else if (high == -1 && (low & 0x40000000) != 0)
- { /* one word negative result */
- low = set_top_bit(low);
- if ((low & fix_mask) == fix_mask) return fixnum_of_int(low);
- else return make_one_word_bignum(low);
- }
- else return make_two_word_bignum(high, low);
- }
- #ifdef COMMON
- static Lisp_Object timesis(Lisp_Object a, Lisp_Object b)
- {
- Float_union bb;
- bb.i = b - TAG_SFLOAT;
- bb.f = (float) ((float)int_of_fixnum(a) * bb.f);
- return (bb.i & ~(int32)0xf) + TAG_SFLOAT;
- }
- #endif
- static Lisp_Object timesib(Lisp_Object a, Lisp_Object b)
- {
- int32 aa = int_of_fixnum(a), lenb, i;
- unsigned32 carry, ms_dig, w;
- Lisp_Object c, nil;
- /*
- * I will split off the (easy) cases of the fixnum being -1, 0 or 1.
- */
- if (aa == 0) return fixnum_of_int(0);
- else if (aa == 1) return b;
- else if (aa == -1) return negateb(b);
- lenb = bignum_length(b);
- push(b);
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, lenb);
- pop(b);
- errexit();
- lenb = (lenb-CELL)/4;
- if (aa < 0)
- { aa = -aa;
- carry = 0xffffffffU;
- for (i=0; i<lenb-1; i++)
- { carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry);
- bignum_digits(c)[i] = clear_top_bit(carry);
- }
- /*
- * I do the most significant digit separately.
- */
- carry = clear_top_bit(~bignum_digits(b)[i]) + top_bit(carry);
- /*
- * there is a special case needed here - if b started off as a number
- * like 0xc0000000,0,0,0 then negating it would call for extension of
- * the bignum c (this is the usual assymetry in the range of twos-
- * complement numbers). But if I detect that case specially I can
- * observe that the ONLY case where negation overflows is where the
- * negated value is exactly a power of 2 such .. an easy thing to
- * multiply a by! Furthermore the power of two involved is known to
- * by 30 mod 31.
- */
- if (carry == 0x40000000)
- { bignum_digits(c)[i] = (aa & 1) << 30;
- aa = aa >> 1;
- goto extend_by_one_word;
- }
- if ((carry & 0x40000000) != 0) carry = set_top_bit(carry);
- bignum_digits(c)[i] = carry;
- }
- else
- { for (i=0; i<lenb; i++) bignum_digits(c)[i] = bignum_digits(b)[i];
- }
- /*
- * Now c is a copy of b (negated if necessary) and I just want to
- * multiply it by the positive value a. This is the heart of the
- * procedure. Re-write Imultiply in assembly code if you want it
- * to go faster. See the top of this file for a portable sample
- * implementation of Imultiply, which gives back a result as a pair
- * of 31-bit values. But note that if the computer has been marked as
- * supporting a 64-bit integer data-type then Imultiply macro-expands
- * into code that uses 64-bit intermediate values.
- */
- carry = 0;
- /*
- * here aa is > 0, and a fortiori the 0x80000000 bit of aa is clear,
- * so I do not have to worry about the difference between 31 and 32
- * bit values for aa.
- */
- for (i=0; i<lenb-1; i++)
- Dmultiply(carry, bignum_digits(c)[i], bignum_digits(c)[i],
- (unsigned32)aa, carry);
- ms_dig = bignum_digits(c)[i];
- Dmultiply(carry, w, clear_top_bit(ms_dig), (unsigned32)aa, carry);
- /*
- * After forming the product to (lenb) digits I need to see if there
- * is any overflow. Calculate what the next digit would be, sign
- * extending into the 0x80000000 bit as necessary.
- */
- if ((carry & 0x40000000) != 0) carry = set_top_bit(carry);
- if (((int32)ms_dig) < 0) carry -= aa;
- aa = (int32)carry;
- if (aa == -1 && (w & 0x40000000) != 0)
- { bignum_digits(c)[i] = set_top_bit(w);
- return c;
- }
- bignum_digits(c)[i] = w;
- if (aa == 0 && (w & 0x40000000) == 0) return c;
- /*
- * drop through to extend the number by a word - note that because I
- * am multiplying by a fixnum it is only possible to have to expand by
- * just one word.
- */
- extend_by_one_word:
- #ifdef ADDRESS_64
- if ((lenb & 1) != 0)
- #else
- if ((lenb & 1) == 0)
- #endif
- /*
- * Here there was a padder word that I can expand into.
- */
- { bignum_digits(c)[lenb] = aa;
- numhdr(c) += pack_hdrlength(1);
- return c;
- }
- /*
- * Need to allocate more space to grow into. I need to grow by just 4 bytes.
- */
- push(c);
- a = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+4+4*lenb);
- pop(c);
- errexit();
- for (i=0; i<lenb; i++)
- bignum_digits(a)[i] = bignum_digits(c)[i];
- bignum_digits(a)[i] = aa;
- bignum_digits(a)[i+1] = 0; /* the padder word */
- return a;
- }
- #ifdef COMMON
- static Lisp_Object timesir(Lisp_Object a, Lisp_Object b)
- /*
- * multiply integer (fixnum or bignum) by ratio.
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- if (a == fixnum_of_int(0)) return a;
- else if (a == fixnum_of_int(1)) return b;
- push3(b, a, nil);
- #define g stack[0]
- #define a stack[-1]
- #define b stack[-2]
- g = gcd(a, denominator(b));
- nil = C_nil;
- if (exception_pending()) goto fail;
- a = quot2(a, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- g = quot2(denominator(b), g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- a = times2(a, numerator(b));
- nil = C_nil;
- if (exception_pending()) goto fail;
- /*
- * make_ratio tidies things up if the denominator was exactly 1
- */
- w = make_ratio(a, g);
- fail:
- popv(3);
- return w;
- #undef a
- #undef b
- #undef g
- }
- static Lisp_Object timesic(Lisp_Object a, Lisp_Object b)
- /*
- * multiply an arbitrary non-complex number by a complex one
- */
- {
- Lisp_Object nil;
- Lisp_Object r = real_part(b), i = imag_part(b);
- push2(a, r);
- i = times2(a, i);
- pop2(r, a);
- errexit();
- push(i);
- r = times2(a, r);
- pop(i);
- errexit();
- return make_complex(r, i);
- }
- #endif
- static Lisp_Object timesif(Lisp_Object a, Lisp_Object b)
- {
- double d = (double)int_of_fixnum(a) * float_of_number(b);
- return make_boxfloat(d, type_of_header(flthdr(b)));
- }
- #ifdef COMMON
- #define timessi(a, b) timesis(b, a)
- static Lisp_Object timessb(Lisp_Object a, Lisp_Object b)
- {
- double d = float_of_number(a) * float_of_number(b);
- return make_sfloat(d);
- }
- #define timessr(a, b) timessb(a, b)
- #define timessc(a, b) timesic(a, b)
- #endif
- static Lisp_Object timessf(Lisp_Object a, Lisp_Object b)
- {
- double d = float_of_number(a) * float_of_number(b);
- return make_boxfloat(d, type_of_header(flthdr(b)));
- }
- #define timesbi(a, b) timesib(b, a)
- #ifdef COMMON
- #define timesbs(a, b) timessb(b, a)
- #endif
- /*
- * Now for bignum multiplication - made more than comfortably complicated
- * by a desire to make it go fast for very big numbers.
- */
- #ifndef KARATSUBA_CUTOFF
- /*
- * I have conducted some experiments on one machine to find out what the
- * best cut-off value here is. The exact value chosen is not very
- * critical, and the fancy techniques do not pay off until numbers get
- * a lot bigger than this length (which is expressed in units of 31-bit
- * words, i.e. about 10 decimals). Anyone who wants may recompile with
- * alternative values to try to get the system fine-tuned for their
- * own computer - but I do not expect it to be possible to achieve much
- * by so doing.
- */
- #define KARATSUBA_CUTOFF 12
- #endif
- static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc);
- static void long_times1(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc)
- /*
- * Here both a and b are big, with lena <= lenb. Split each into two chunks
- * of size (lenc/4), say (a1,a2) and (b1,b2), and compute each of
- * a1*b1
- * a2*b2
- * (a1+a2)*(b1+b2)
- * where in the last case a1+a2 and b1+b2 can be computed keeping their
- * carry bits as k1 and k2 (so that a1+a2 and b1+b2 are restricted to
- * size lenb). The chunks can then be combined with a few additions and
- * subtractions to form the product that is really wanted. If in fact a
- * was shorter than lenc/4 (so that after a is split up the top half is
- * all zero) I do things in a more straightforward way. I require that on
- * entry to this code lenc<4 < lenb <= lenc/2.
- */
- {
- int32 h = lenc/4; /* lenc must have been made even enough... */
- int32 lena1 = lena - h;
- int32 lenb1 = lenb - h;
- unsigned32 carrya, carryb;
- int32 i;
- /*
- * if the top half of a would be all zero I go through a separate path,
- * doing just two subsidiary multiplications.
- */
- if (lena1 <= 0)
- { long_times(c+h, a, b+h, d, lena, lenb-h, 2*h);
- for (i=0; i<h; i++) c[3*h+i] = 0;
- long_times(d, a, b, c, lena, h, 2*h);
- for (i=0; i<h; i++) c[i] = d[i];
- carrya = 0;
- for (;i<2*h; i++)
- { unsigned32 w = c[i] + d[i] + carrya;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (;carrya!=0;i++)
- { unsigned32 w = c[i] + 1;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- return;
- }
- /*
- * form (a1+a2) and (b1+b2).
- */
- carrya = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = a[i] + carrya;
- if (i < lena1) w += a[h+i];
- d[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carryb = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = b[i] + carryb;
- if (i < lenb1) w += b[h+i];
- d[h+i] = clear_top_bit(w);
- carryb = w >> 31;
- }
- long_times(c+h, d, d+h, c, h, h, 2*h);
- /*
- * Adjust to allow for the cases of a1+a2 or b1+b2 overflowing
- * by a single bit.
- */
- c[3*h] = carrya & carryb;
- if (carrya != 0)
- { carrya = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = c[2*h+i] + d[h+i] + carrya;
- c[2*h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- }
- if (carryb != 0)
- { carryb = 0;
- for (i=0; i<h; i++)
- { unsigned32 w = c[2*h+i] + d[i] + carryb;
- c[2*h+i] = clear_top_bit(w);
- carryb = w >> 31;
- }
- }
- c[3*h] += carrya + carryb;
- for (i=1; i<h; i++) c[3*h+i] = 0;
- /*
- * Now (a1+a2)*(b1+b2) should have been computed totally properly
- */
- for (i=0; i<h; i++) d[h+i] = 0;
- /*
- * multiply out a1*b1, where note that a1 and b1 may be less long
- * than h, but not by much.
- */
- long_times(d, a+h, b+h, c, lena-h, lenb-h, 2*h);
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[2*h+i] + d[i] + carrya;
- c[2*h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[h+i] - d[i] - carrya;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<3*h; i++)
- { unsigned32 w = c[h+i] - 1;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- /*
- * multiply out a2*b2
- */
- long_times(d, a, b, c, h, h, 2*h);
- for (i=0; i<h; i++) c[i] = d[i];
- carrya = 0;
- for (; i<2*h; i++)
- { unsigned32 w = c[i] + d[i] + carrya;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<4*h; i++)
- { unsigned32 w = c[i] + 1;
- c[i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- carrya = 0;
- for (i=0; i<2*h; i++)
- { unsigned32 w = c[h+i] - d[i] - carrya;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- for (; carrya!=0 && i<3*h; i++)
- { unsigned32 w = c[h+i] - 1;
- c[h+i] = clear_top_bit(w);
- carrya = w >> 31;
- }
- /*
- * The product is now complete
- */
- }
- static void long_times2(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- int32 lena, int32 lenb, int32 lenc)
- /*
- * This case is standard old fashioned long multiplication. Dump the
- * result into c.
- */
- {
- int32 i;
- for (i=0; i<lenc; i++) c[i] = 0;
- for (i=0; i<lena; i++)
- { unsigned32 carry = 0, da = a[i];
- int32 j;
- /*
- * When I multiply by (for instance) a high power of 2 there will
- * be plenty of zero digits in the number being worked with, and
- * so the test da!=0 will save something useful.
- */
- if (da != 0)
- { for (j=0; j<lenb; j++)
- { int32 k = i + j;
- Dmultiply(carry, c[k], da, b[j],
- /* NB the addition here is OK and fits into a 32-bit unsigned result */
- carry + c[k]);
- }
- c[i+j] = carry;
- }
- }
- }
- static void long_times(unsigned32 *c, unsigned32 *a, unsigned32 *b,
- unsigned32 *d, int32 lena, int32 lenb, int32 lenc)
- /*
- * This decides if a multiplication is big enough to benefit from
- * decomposition a la Karatsuba.
- * In recursive entries through here out of long_times1() the numbers a
- * and b may have shrunk in ways that mean I need to reconsider the
- * precision to which I am working. This must leave c filled out all
- * the way to lenc, with padding 0s if necessary.
- */
- {
- if (lenb < lena)
- { unsigned32 *t1;
- int32 t2;
- t1 = a; a = b; b = t1;
- t2 = lena; lena = lenb; lenb = t2;
- }
- if (4*lenb <= lenc) /* In this case I should shrink lenc a bit.. */
- { int32 newlenc = (lenb+1)/2;
- int k = 0;
- while (newlenc > KARATSUBA_CUTOFF)
- { newlenc = (newlenc + 1)/2;
- k++;
- }
- while (k != 0)
- { newlenc = 2*newlenc;
- k--;
- }
- newlenc = 4*newlenc;
- while (lenc > newlenc) c[--lenc] = 0;
- }
- if (lena > KARATSUBA_CUTOFF) long_times1(c, a, b, d, lena, lenb, lenc);
- else long_times2(c, a, b, lena, lenb, lenc);
- }
- static Lisp_Object timesbb(Lisp_Object a, Lisp_Object b)
- /*
- * a and b are both guaranteed to be bignums when I call this
- * procedure.
- */
- {
- int sign = 1;
- Lisp_Object c, d, nil;
- int32 lena, lenb, lenc, i;
- lena = (bignum_length(a) - CELL)/4;
- lenb = (bignum_length(b) - CELL)/4;
- if (lena == 1 && lenb == 1)
- /*
- * I am going to deem multiplication of two one-word bignums worthy of
- * a special case, since it is probably fairly common and it will be cheap
- * enough that avoiding overheads might matter. I still need to split
- * off the signs, because Imultiply can only deal with 31-bit unsigned values.
- * One-word bignums each have value at least 2^27 (or else they would have
- * been represented as fixnums) so the result here will always be a two-word
- * bignum.
- */
- { int32 va = (int32)bignum_digits(a)[0],
- vb = (int32)bignum_digits(b)[0], vc;
- unsigned32 vclow;
- if (va < 0)
- { if (vb < 0) Dmultiply(vc, vclow, -va, -vb, 0);
- else
- { Dmultiply(vc, vclow, -va, vb, 0);
- if (vclow == 0) vc = -vc;
- else
- { vclow = clear_top_bit(-(int32)vclow);
- vc = ~vc;
- }
- }
- }
- else if (vb < 0)
- { Dmultiply(vc, vclow, va, -vb, 0);
- if (vclow == 0) vc = -vc;
- else
- { vclow = clear_top_bit(-(int32)vclow);
- vc = ~vc;
- }
- }
- else Dmultiply(vc, vclow, va, vb, 0);
- return make_two_word_bignum(vc, vclow);
- }
- /*
- * I take the absolute values of the two input values a and b,
- * recording what the eventual sign for the product will need to be.
- */
- if (((int32)bignum_digits(a)[lena-1]) < 0)
- { sign = -sign;
- push(b);
- /*
- * Negating a negative bignum can sometimes mean that it will
- * have to get longer (because of the twos complement assymmetry),
- * but can never cause it to shrink, In particular it can never lead
- * to demotion to a fixnum, so after this call to negateb it is still
- * OK to assume that a is a bignum.
- */
- a = negateb(a);
- pop(b);
- errexit();
- lena = (bignum_length(a) - CELL)/4;
- }
- if (((int32)bignum_digits(b)[lenb-1]) < 0)
- { sign = -sign;
- push(a);
- /* see above comments about negateb */
- b = negateb(b);
- pop(a);
- errexit();
- lenb = (bignum_length(b) - CELL)/4;
- }
- if (lenb < lena) /* Commute so that b is at least as long as a */
- { c = a;
- a = b;
- b = c;
- lenc = lena;
- lena = lenb;
- lenb = lenc;
- }
- push2(a, b);
- /*
- * For very big numbers I have two special actions called for here. First
- * I must round up the size of the result vector to have a big enough power
- * of two as a factor so that (recursive) splitting in two does not cause
- * trouble later. Then I have to allocate some workspace, the size of that
- * being related to the size of the numbers being handled.
- */
- if (lena > KARATSUBA_CUTOFF)
- {
- int k = 0;
- /*
- * I pad lenc up to have a suitably large power of 2 as a factor so
- * that splitting numbers in half works neatly for me.
- */
- lenc = (lenb+1)/2; /* rounded up half-length of longer number */
- while (lenc > KARATSUBA_CUTOFF)
- { lenc = (lenc + 1)/2;
- k++;
- }
- while (k != 0)
- { lenc = 2*lenc;
- k--;
- }
- lenc = 2*lenc;
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+8*lenc);
- errexitn(2);
- /*
- * The next line seems pretty shameless, but it may reduce the amount of
- * garbage collection I do. When the workspace vector needed is short enough
- * (at present up to 256 bytes) I use the character assembly buffer (boffo)
- * as my workspace, relying on an expectation that bignumber multiplication
- * can never be triggered from places within the reader where that buffer is
- * in use for its normal purpose. I forge tag bits to make boffo look like
- * a number here, but can never trigger garbage collection in a way that
- * might inspect same, so that too is safe at present.
- */
- if ((4*lenc+CELL) <= (intxx)length_of_header(vechdr(boffo)))
- d = (Lisp_Object)((char *)boffo + TAG_NUMBERS - TAG_VECTOR);
- else
- { push(c);
- d = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+4*lenc);
- pop(c);
- }
- lenc = 2*lenc;
- }
- else
- {
- /*
- * In cases where I will use classical long multiplication there is no
- * need to waste space with extra padding or with the workspace vector d.
- */
- lenc = lena + lenb;
- c = getvector(TAG_NUMBERS, TYPE_BIGNUM, CELL+4*lenc);
- d = c; /* set d to avoid dataflow anomaly */
- }
- pop2(b, a);
- errexit();
- { unsigned32 *da = &bignum_digits(a)[0],
- *db = &bignum_digits(b)[0],
- *dc = &bignum_digits(c)[0],
- *dd = &bignum_digits(d)[0];
- long_times(dc, da, db, dd, lena, lenb, lenc);
- }
- /*
- * Here the absolute value of the product has been computed properly.
- * The result can easily have a zero top digit, which will need trimming
- * off. If at least one of the input values was a number which had to
- * be represented with a zero leading digit (e.g. 0x40000000) then the
- * product can have two leading zero digits here. Similarly for negative
- * results. Also padding (to allow splitting numbers into two) can have
- * resulted in extra padding up at the top. lenc gives the size of vector
- * that was allocated, lena+lenb is a much better guess of how much data
- * is active in it.
- */
- errexit();
- { int32 newlenc = lena + lenb;
- /*
- * I tidy up by putting a zero in any padding word above the top of the
- * active data, and by inserting a header in space that gets trimmed off
- * in such a way that the garbage collector will not get upset. This
- * all just roughly trims the numbers - fine adjustment follows.
- */
- #ifdef ADDRESS_64
- if ((newlenc & 1) != 0)
- #else
- if ((newlenc & 1) == 0)
- #endif
- { bignum_digits(c)[newlenc] = 0;
- if (lenc != newlenc) /* i.e. I padded out somewhat */
- {
- *(Header *)&bignum_digits(c)[newlenc+1] =
- make_bighdr(lenc-newlenc-1);
- lenc = newlenc;
- numhdr(c) = make_bighdr(lenc+CELL/4);
- }
- }
- else if (lenc != newlenc) /* i.e. I padded out somewhat */
- {
- *(Header *)&bignum_digits(c)[newlenc] =
- make_bighdr(lenc-newlenc);
- lenc = newlenc;
- numhdr(c) = make_bighdr(lenc+CELL/4);
- }
- }
- /*
- * Now I am safe against the garbage collector, and the number c has as
- * its length just lena+lenb, even if it had been padded out earlier.
- */
- if (sign < 0)
- { unsigned32 carry = 0xffffffffU;
- for (i=0; i<lenc-1; i++)
- { carry = clear_top_bit(~bignum_digits(c)[i]) + top_bit(carry);
- bignum_digits(c)[i] = clear_top_bit(carry);
- }
- carry = ~bignum_digits(c)[i] + top_bit(carry);
- if (carry != -1)
- { bignum_digits(c)[i] = carry;
- return c; /* no truncation needed */
- }
- carry = bignum_digits(c)[i-1];
- if ((carry & 0x40000000) == 0)
- { bignum_digits(c)[i] = 0xffffffffU;
- return c; /* no truncation becase of previous digit */
- }
- /*
- * I need to argue that lenc was at least 2, so bignum_digits(c)[i-2]
- * could at worst access the header word of the bignum - but it can never
- * do that because if it were doing so then the bignum product would
- * be about to have a value zero or thereabouts. One-word bignums are not
- * allowed to have leading zero digits.
- */
- if (carry == 0x7fffffff &&
- (bignum_digits(c)[i-2] & 0x40000000) != 0) /* chop 2 */
- { bignum_digits(c)[i-2] |= ~0x7fffffff;
- /*
- * I common up the code to chop off two words from the number at label "chop2"
- */
- goto chop2;
- }
- bignum_digits(c)[i-1] |= ~0x7fffffff;
- /* Drop through to truncate by 1 and sometimes that is easy */
- }
- else
- { unsigned32 w = bignum_digits(c)[lenc-1];
- if (w != 0) return c; /* no truncation */
- w = bignum_digits(c)[lenc-2];
- if ((w & 0x40000000) != 0) return c;
- if (w == 0 &&
- (bignum_digits(c)[lenc-3] & 0x40000000) == 0) goto chop2;
- /* truncate one word */
- }
- /*
- * here the data in the bignum is all correct (even in the most significant
- * digit) but I need to shrink the number by one word. Because of all the
- * doubleword alignment that is used here this can sometimes be done very
- * easily, and other times it involves forging a short bit of dummy data
- * to fill in a gap that gets left in the heap.
- */
- numhdr(c) -= pack_hdrlength(1);
- #ifdef ADDRESS_64
- if ((lenc & 1) == 0)
- #else
- if ((lenc & 1) != 0)
- #endif
- bignum_digits(c)[lenc-1] = 0; /* tidy up */
- else bignum_digits(c)[lenc-1] = make_bighdr(2);
- return c;
- chop2:
- /*
- * Trim two words from the number c
- */
- numhdr(c) -= pack_hdrlength(2);
- lenc -= 2;
- bignum_digits(c)[lenc] = 0;
- #ifdef ADDRESS_64
- lenc = (lenc + 1) & ~1;
- #else
- lenc |= 1;
- #endif
- *(Header *)&bignum_digits(c)[lenc] = make_bighdr(2);
- return c;
- }
- #ifdef COMMON
- #define timesbr(a, b) timesir(a, b)
- #define timesbc(a, b) timesic(a, b)
- #endif
- #define timesbf(a, b) timessf(a, b)
- #ifdef COMMON
- #define timesri(a, b) timesir(b, a)
- #define timesrs(a, b) timessr(b, a)
- #define timesrb(a, b) timesbr(b, a)
- static Lisp_Object timesrr(Lisp_Object a, Lisp_Object b)
- /*
- * multiply a pair of rational numbers
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- push5(numerator(a), denominator(a),
- numerator(b), denominator(b), nil);
- #define g stack[0]
- #define db stack[-1]
- #define nb stack[-2]
- #define da stack[-3]
- #define na stack[-4]
- g = gcd(na, db);
- nil = C_nil;
- if (exception_pending()) goto fail;
- na = quot2(na, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- db = quot2(db, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- g = gcd(nb, da);
- nil = C_nil;
- if (exception_pending()) goto fail;
- nb = quot2(nb, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- da = quot2(da, g);
- nil = C_nil;
- if (exception_pending()) goto fail;
- na = times2(na, nb);
- nil = C_nil;
- if (exception_pending()) goto fail;
- da = times2(da, db);
- nil = C_nil;
- if (exception_pending()) goto fail;
- w = make_ratio(na, da);
- fail:
- popv(5);
- return w;
- #undef g
- #undef db
- #undef nb
- #undef da
- #undef na
- }
- #define timesrc(a, b) timesic(a, b)
- #define timesrf(a, b) timessf(a, b)
- #define timesci(a, b) timesic(b, a)
- #define timescs(a, b) timessc(b, a)
- #define timescb(a, b) timesbc(b, a)
- #define timescr(a, b) timesrc(b, a)
- static Lisp_Object timescc(Lisp_Object a, Lisp_Object b)
- /*
- * multiply a pair of complex values
- */
- {
- Lisp_Object nil = C_nil;
- Lisp_Object w = nil;
- push4(real_part(a), imag_part(a),
- real_part(b), imag_part(b));
- push2(nil, nil);
- #define u stack[0]
- #define v stack[-1]
- #define ib stack[-2]
- #define rb stack[-3]
- #define ia stack[-4]
- #define ra stack[-5]
- u = times2(ra, rb);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = times2(ia, ib);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = negate(v);
- nil = C_nil;
- if (exception_pending()) goto fail;
- u = plus2(u, v); /* real part of result */
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = times2(ra, ib);
- nil = C_nil;
- if (exception_pending()) goto fail;
- ib = times2(rb, ia);
- nil = C_nil;
- if (exception_pending()) goto fail;
- v = plus2(v, ib); /* imaginary part */
- nil = C_nil;
- if (exception_pending()) goto fail;
- w = make_complex(u, v);
- fail:
- popv(6);
- return w;
- #undef u
- #undef v
- #undef ib
- #undef rb
- #undef ia
- #undef ra
- }
- #define timescf(a, b) timesci(a, b)
- #endif
- #define timesfi(a, b) timesif(b, a)
- #ifdef COMMON
- #define timesfs(a, b) timessf(b, a)
- #endif
- #define timesfb(a, b) timesbf(b, a)
- #ifdef COMMON
- #define timesfr(a, b) timesrf(b, a)
- #define timesfc(a, b) timescf(b, a)
- #endif
- static Lisp_Object timesff(Lisp_Object a, Lisp_Object b)
- /*
- * multiply boxed floats - see commentary on plusff()
- */
- {
- #ifdef COMMON
- int32 ha = type_of_header(flthdr(a)), hb = type_of_header(flthdr(b));
- #endif
- double d = float_of_number(a) * float_of_number(b);
- #ifdef COMMON
- if (ha == TYPE_LONG_FLOAT || hb == TYPE_LONG_FLOAT)
- ha = TYPE_LONG_FLOAT;
- else if (ha == TYPE_DOUBLE_FLOAT || hb == TYPE_DOUBLE_FLOAT)
- ha = TYPE_DOUBLE_FLOAT;
- else ha = TYPE_SINGLE_FLOAT;
- return make_boxfloat(d, ha);
- #else
- return make_boxfloat(d, TYPE_DOUBLE_FLOAT);
- #endif
- }
- /*
- * ... and now for the dispatch code that copes with general
- * multiplication.
- */
- Lisp_Object times2(Lisp_Object a, Lisp_Object b)
- {
- switch ((int)a & TAG_BITS)
- {
- case TAG_FIXNUM:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesii(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesis(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesib(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesir(a, b);
- case TYPE_COMPLEX_NUM:
- return timesic(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesif(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #ifdef COMMON
- case TAG_SFLOAT:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timessi(a, b);
- case TAG_SFLOAT:
- { Float_union aa, bb; /* timesss() coded in-line */
- aa.i = a - TAG_SFLOAT;
- bb.i = b - TAG_SFLOAT;
- aa.f = (float) (aa.f * bb.f);
- return (aa.i & ~(int32)0xf) + TAG_SFLOAT;
- }
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timessb(a, b);
- case TYPE_RATNUM:
- return timessr(a, b);
- case TYPE_COMPLEX_NUM:
- return timessc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timessf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #endif
- case TAG_NUMBERS:
- { int32 ha = type_of_header(numhdr(a));
- switch (ha)
- {
- case TYPE_BIGNUM:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesbi(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesbs(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesbb(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesbr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesbc(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesbf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #ifdef COMMON
- case TYPE_RATNUM:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesri(a, b);
- case TAG_SFLOAT:
- return timesrs(a, b);
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesrb(a, b);
- case TYPE_RATNUM:
- return timesrr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesrc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesrf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- case TYPE_COMPLEX_NUM:
- switch (b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesci(a, b);
- case TAG_SFLOAT:
- return timescs(a, b);
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timescb(a, b);
- case TYPE_RATNUM:
- return timescr(a, b);
- case TYPE_COMPLEX_NUM:
- return timescc(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timescf(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- #endif
- default: return aerror1("bad arg for times", a);
- }
- }
- case TAG_BOXFLOAT:
- switch ((int)b & TAG_BITS)
- {
- case TAG_FIXNUM:
- return timesfi(a, b);
- #ifdef COMMON
- case TAG_SFLOAT:
- return timesfs(a, b);
- #endif
- case TAG_NUMBERS:
- { int32 hb = type_of_header(numhdr(b));
- switch (hb)
- {
- case TYPE_BIGNUM:
- return timesfb(a, b);
- #ifdef COMMON
- case TYPE_RATNUM:
- return timesfr(a, b);
- case TYPE_COMPLEX_NUM:
- return timesfc(a, b);
- #endif
- default:
- return aerror1("bad arg for times", b);
- }
- }
- case TAG_BOXFLOAT:
- return timesff(a, b);
- default:
- return aerror1("bad arg for times", b);
- }
- default:
- return aerror1("bad arg for times", a);
- }
- }
- /* end of arith02.c */
|