chi_squared_test.sf 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/ruby
  2. #
  3. ## https://rosettacode.org/wiki/Verify_distribution_uniformity/Chi-squared_test
  4. #
  5. # Confluent hypergeometric function of the first kind F_1(a;b;z)
  6. func F1(a, b, z, limit=100) {
  7. sum(0..limit, {|k|
  8. rising_factorial(a, k) / rising_factorial(b, k) * z**k / k!
  9. })
  10. }
  11. func γ(a,x) { # lower incomplete gamma function γ(a,x)
  12. #a**(-1) * x**a * F1(a, a+1, -x) # simpler formula
  13. a**(-1) * x**a * exp(-x) * F1(1, a+1, x) # slightly better convergence
  14. }
  15. func P(a,z) { # regularized gamma function P(a,z)
  16. γ(a,z) / Γ(a)
  17. }
  18. func chi_squared_cdf (k, x) {
  19. var f = (k<20 ? 20 : 10)
  20. given(x) {
  21. when (0) { 0 }
  22. case (. < (k + f*sqrt(k))) { P(k/2, x/2) }
  23. else { 1 }
  24. }
  25. }
  26. func chi_squared_test(arr, significance = 0.05) {
  27. var n = arr.len
  28. var N = arr.sum
  29. var expected = N/n
  30. var χ_squared = arr.sum_by {|v| (v-expected)**2 / expected }
  31. var p_value = (1 - chi_squared_cdf(n-1, χ_squared))
  32. [χ_squared, p_value, p_value > significance]
  33. }
  34. [
  35. %n< 199809 200665 199607 200270 199649 >,
  36. %n< 522573 244456 139979 71531 21461 >,
  37. ].each {|dataset|
  38. var r = chi_squared_test(dataset)
  39. say "data: #{dataset}"
  40. say "χ² = #{r[0]}, p-value = #{r[1].round(-4)}, uniform = #{r[2]}\n"
  41. }