gsl_cdf__hypergeometric.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. /* cdf/hypergeometric.c
  2. *
  3. * Copyright (C) 2004 Jason H. Stover.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
  18. */
  19. /*
  20. * Computes the cumulative distribution function for a hypergeometric
  21. * random variable. A hypergeometric random variable X is the number
  22. * of elements of type 1 in a sample of size t, drawn from a population
  23. * of size n1 + n2, in which n1 are of type 1 and n2 are of type 2.
  24. *
  25. * This algorithm computes Pr( X <= k ) by summing the terms from
  26. * the mass function, Pr( X = k ).
  27. *
  28. * References:
  29. *
  30. * T. Wu. An accurate computation of the hypergeometric distribution
  31. * function. ACM Transactions on Mathematical Software. Volume 19, number 1,
  32. * March 1993.
  33. * This algorithm is not used, since it requires factoring the
  34. * numerator and denominator, then cancelling. It is more accurate
  35. * than the algorithm used here, but the cancellation requires more
  36. * time than the algorithm used here.
  37. *
  38. * W. Feller. An Introduction to Probability Theory and Its Applications,
  39. * third edition. 1968. Chapter 2, section 6.
  40. */
  41. #include "gsl__config.h"
  42. #include <math.h>
  43. #include "gsl_math.h"
  44. #include "gsl_errno.h"
  45. #include "gsl_cdf.h"
  46. #include "gsl_randist.h"
  47. #include "gsl_cdf__error.h"
  48. static double
  49. lower_tail (const unsigned int k, const unsigned int n1,
  50. const unsigned int n2, const unsigned int t)
  51. {
  52. double relerr;
  53. int i = k;
  54. double s, P;
  55. s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
  56. P = s;
  57. while (i > 0)
  58. {
  59. double factor =
  60. (i / (n1 - i + 1.0)) * ((n2 + i - t) / (t - i + 1.0));
  61. s *= factor;
  62. P += s;
  63. relerr = s / P;
  64. if (relerr < GSL_DBL_EPSILON)
  65. break;
  66. i--;
  67. }
  68. return P;
  69. }
  70. static double
  71. upper_tail (const unsigned int k, const unsigned int n1,
  72. const unsigned int n2, const unsigned int t)
  73. {
  74. double relerr;
  75. unsigned int i = k + 1;
  76. double s, Q;
  77. s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
  78. Q = s;
  79. while (i < t)
  80. {
  81. double factor =
  82. ((n1 - i) / (i + 1.0)) * ((t - i) / (n2 + i + 1.0 - t));
  83. s *= factor;
  84. Q += s;
  85. relerr = s / Q;
  86. if (relerr < GSL_DBL_EPSILON)
  87. break;
  88. i++;
  89. }
  90. return Q;
  91. }
  92. /*
  93. * Pr (X <= k)
  94. */
  95. double
  96. gsl_cdf_hypergeometric_P (const unsigned int k,
  97. const unsigned int n1,
  98. const unsigned int n2, const unsigned int t)
  99. {
  100. double P;
  101. if (t > (n1 + n2))
  102. {
  103. CDF_ERROR ("t larger than population size", GSL_EDOM);
  104. }
  105. else if (k >= n1 || k >= t)
  106. {
  107. P = 1.0;
  108. }
  109. else if (k < 0.0)
  110. {
  111. P = 0.0;
  112. }
  113. else
  114. {
  115. double midpoint = (int) (t * n1 / (n1 + n2));
  116. if (k >= midpoint)
  117. {
  118. P = 1 - upper_tail (k, n1, n2, t);
  119. }
  120. else
  121. {
  122. P = lower_tail (k, n1, n2, t);
  123. }
  124. }
  125. return P;
  126. }
  127. /*
  128. * Pr (X > k)
  129. */
  130. double
  131. gsl_cdf_hypergeometric_Q (const unsigned int k,
  132. const unsigned int n1,
  133. const unsigned int n2, const unsigned int t)
  134. {
  135. double Q;
  136. if (t > (n1 + n2))
  137. {
  138. CDF_ERROR ("t larger than population size", GSL_EDOM);
  139. }
  140. else if (k >= n1 || k >= t)
  141. {
  142. Q = 0.0;
  143. }
  144. else if (k < 0.0)
  145. {
  146. Q = 1.0;
  147. }
  148. else
  149. {
  150. double midpoint = (int) (t * n1 / (n1 + n2));
  151. if (k < midpoint)
  152. {
  153. Q = 1 - lower_tail (k, n1, n2, t);
  154. }
  155. else
  156. {
  157. Q = upper_tail (k, n1, n2, t);
  158. }
  159. }
  160. return Q;
  161. }