qdivrem.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*-
  2. * Copyright (c) 1992, 1993
  3. * The Regents of the University of California. All rights reserved.
  4. *
  5. * This software was developed by the Computer Systems Engineering group
  6. * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  7. * contributed to Berkeley.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. * 3. Neither the name of the University nor the names of its contributors
  18. * may be used to endorse or promote products derived from this software
  19. * without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31. * SUCH DAMAGE.
  32. */
  33. /*
  34. * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
  35. * section 4.3.1, pp. 257--259.
  36. */
  37. #include "quad.h"
  38. #define B ((int)1 << HALF_BITS) /* digit base */
  39. /* Combine two `digits' to make a single two-digit number. */
  40. #define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b))
  41. /* select a type for digits in base B: use unsigned short if they fit */
  42. #if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff
  43. typedef unsigned short digit;
  44. #else
  45. typedef u_int digit;
  46. #endif
  47. static void shl(digit *p, int len, int sh);
  48. /*
  49. * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
  50. *
  51. * We do this in base 2-sup-HALF_BITS, so that all intermediate products
  52. * fit within u_int. As a consequence, the maximum length dividend and
  53. * divisor are 4 `digits' in this base (they are shorter if they have
  54. * leading zeros).
  55. */
  56. u_quad_t
  57. __qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)
  58. {
  59. union uu tmp;
  60. digit *u, *v, *q;
  61. digit v1, v2;
  62. u_int qhat, rhat, t;
  63. int m, n, d, j, i;
  64. digit uspace[5], vspace[5], qspace[5];
  65. /*
  66. * Take care of special cases: divide by zero, and u < v.
  67. */
  68. if (vq == 0) {
  69. /* divide by zero. */
  70. static volatile const unsigned int zero = 0;
  71. tmp.ul[H] = tmp.ul[L] = 1 / zero;
  72. if (arq)
  73. *arq = uq;
  74. return (tmp.q);
  75. }
  76. if (uq < vq) {
  77. if (arq)
  78. *arq = uq;
  79. return (0);
  80. }
  81. u = &uspace[0];
  82. v = &vspace[0];
  83. q = &qspace[0];
  84. /*
  85. * Break dividend and divisor into digits in base B, then
  86. * count leading zeros to determine m and n. When done, we
  87. * will have:
  88. * u = (u[1]u[2]...u[m+n]) sub B
  89. * v = (v[1]v[2]...v[n]) sub B
  90. * v[1] != 0
  91. * 1 < n <= 4 (if n = 1, we use a different division algorithm)
  92. * m >= 0 (otherwise u < v, which we already checked)
  93. * m + n = 4
  94. * and thus
  95. * m = 4 - n <= 2
  96. */
  97. tmp.uq = uq;
  98. u[0] = 0;
  99. u[1] = (digit)HHALF(tmp.ul[H]);
  100. u[2] = (digit)LHALF(tmp.ul[H]);
  101. u[3] = (digit)HHALF(tmp.ul[L]);
  102. u[4] = (digit)LHALF(tmp.ul[L]);
  103. tmp.uq = vq;
  104. v[1] = (digit)HHALF(tmp.ul[H]);
  105. v[2] = (digit)LHALF(tmp.ul[H]);
  106. v[3] = (digit)HHALF(tmp.ul[L]);
  107. v[4] = (digit)LHALF(tmp.ul[L]);
  108. for (n = 4; v[1] == 0; v++) {
  109. if (--n == 1) {
  110. u_int rbj; /* r*B+u[j] (not root boy jim) */
  111. digit q1, q2, q3, q4;
  112. /*
  113. * Change of plan, per exercise 16.
  114. * r = 0;
  115. * for j = 1..4:
  116. * q[j] = floor((r*B + u[j]) / v),
  117. * r = (r*B + u[j]) % v;
  118. * We unroll this completely here.
  119. */
  120. t = v[2]; /* nonzero, by definition */
  121. q1 = (digit)(u[1] / t);
  122. rbj = COMBINE(u[1] % t, u[2]);
  123. q2 = (digit)(rbj / t);
  124. rbj = COMBINE(rbj % t, u[3]);
  125. q3 = (digit)(rbj / t);
  126. rbj = COMBINE(rbj % t, u[4]);
  127. q4 = (digit)(rbj / t);
  128. if (arq)
  129. *arq = rbj % t;
  130. tmp.ul[H] = COMBINE(q1, q2);
  131. tmp.ul[L] = COMBINE(q3, q4);
  132. return (tmp.q);
  133. }
  134. }
  135. /*
  136. * By adjusting q once we determine m, we can guarantee that
  137. * there is a complete four-digit quotient at &qspace[1] when
  138. * we finally stop.
  139. */
  140. for (m = 4 - n; u[1] == 0; u++)
  141. m--;
  142. for (i = 4 - m; --i >= 0;)
  143. q[i] = 0;
  144. q += 4 - m;
  145. /*
  146. * Here we run Program D, translated from MIX to C and acquiring
  147. * a few minor changes.
  148. *
  149. * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
  150. */
  151. d = 0;
  152. for (t = v[1]; t < B / 2; t <<= 1)
  153. d++;
  154. if (d > 0) {
  155. shl(&u[0], m + n, d); /* u <<= d */
  156. shl(&v[1], n - 1, d); /* v <<= d */
  157. }
  158. /*
  159. * D2: j = 0.
  160. */
  161. j = 0;
  162. v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
  163. v2 = v[2]; /* for D3 */
  164. do {
  165. digit uj0, uj1, uj2;
  166. /*
  167. * D3: Calculate qhat (\^q, in TeX notation).
  168. * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
  169. * let rhat = (u[j]*B + u[j+1]) mod v[1].
  170. * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
  171. * decrement qhat and increase rhat correspondingly.
  172. * Note that if rhat >= B, v[2]*qhat < rhat*B.
  173. */
  174. uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
  175. uj1 = u[j + 1]; /* for D3 only */
  176. uj2 = u[j + 2]; /* for D3 only */
  177. if (uj0 == v1) {
  178. qhat = B;
  179. rhat = uj1;
  180. goto qhat_too_big;
  181. } else {
  182. u_int nn = COMBINE(uj0, uj1);
  183. qhat = nn / v1;
  184. rhat = nn % v1;
  185. }
  186. while (v2 * qhat > COMBINE(rhat, uj2)) {
  187. qhat_too_big:
  188. qhat--;
  189. if ((rhat += v1) >= B)
  190. break;
  191. }
  192. /*
  193. * D4: Multiply and subtract.
  194. * The variable `t' holds any borrows across the loop.
  195. * We split this up so that we do not require v[0] = 0,
  196. * and to eliminate a final special case.
  197. */
  198. for (t = 0, i = n; i > 0; i--) {
  199. t = u[i + j] - v[i] * qhat - t;
  200. u[i + j] = (digit)LHALF(t);
  201. t = (B - HHALF(t)) & (B - 1);
  202. }
  203. t = u[j] - t;
  204. u[j] = (digit)LHALF(t);
  205. /*
  206. * D5: test remainder.
  207. * There is a borrow if and only if HHALF(t) is nonzero;
  208. * in that (rare) case, qhat was too large (by exactly 1).
  209. * Fix it by adding v[1..n] to u[j..j+n].
  210. */
  211. if (HHALF(t)) {
  212. qhat--;
  213. for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
  214. t += u[i + j] + v[i];
  215. u[i + j] = (digit)LHALF(t);
  216. t = HHALF(t);
  217. }
  218. u[j] = (digit)LHALF(u[j] + t);
  219. }
  220. q[j] = (digit)qhat;
  221. } while (++j <= m); /* D7: loop on j. */
  222. /*
  223. * If caller wants the remainder, we have to calculate it as
  224. * u[m..m+n] >> d (this is at most n digits and thus fits in
  225. * u[m+1..m+n], but we may need more source digits).
  226. */
  227. if (arq) {
  228. if (d) {
  229. for (i = m + n; i > m; --i)
  230. u[i] = (digit)(((u_int)u[i] >> d) |
  231. LHALF((u_int)u[i - 1] << (HALF_BITS - d)));
  232. u[i] = 0;
  233. }
  234. tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
  235. tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
  236. *arq = tmp.q;
  237. }
  238. tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
  239. tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
  240. return (tmp.q);
  241. }
  242. /*
  243. * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
  244. * `fall out' the left (there never will be any such anyway).
  245. * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
  246. */
  247. static void
  248. shl(digit *p, int len, int sh)
  249. {
  250. int i;
  251. for (i = 0; i < len; i++)
  252. p[i] = (digit)(LHALF((u_int)p[i] << sh) |
  253. ((u_int)p[i + 1] >> (HALF_BITS - sh)));
  254. p[i] = (digit)(LHALF((u_int)p[i] << sh));
  255. }