most_frequent_k_chars_distance.sf 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. #!/usr/bin/ruby
  2. #
  3. ## https://rosettacode.org/wiki/Most_frequent_k_chars_distance
  4. #
  5. func _MostFreqKHashing(string, k) {
  6. var seen = Hash()
  7. var chars = string.chars
  8. var freq = chars.freq
  9. var schars = freq.keys.sort_by {|c| -freq{c} }
  10. var mfkh = []
  11. k.times { |i|
  12. chars.each { |c|
  13. seen{c} && next
  14. if (freq{c} == freq{schars[i]}) {
  15. seen{c} = true
  16. mfkh << Hash(c => c, f => freq{c})
  17. break
  18. }
  19. }
  20. }
  21. mfkh << (k-seen.len -> of { Hash(c => :NULL, f => 0) }...)
  22. mfkh
  23. }
  24. func MostFreqKSDF(a, b, k, d) {
  25. var mfkh_a = _MostFreqKHashing(a, k);
  26. var mfkh_b = _MostFreqKHashing(b, k);
  27. d - gather {
  28. mfkh_a.each { |s|
  29. s{:c} == :NULL && next
  30. mfkh_b.each { |t|
  31. s{:c} == t{:c} &&
  32. take(s{:f} + (s{:f} == t{:f} ? 0 : t{:f}))
  33. }
  34. }
  35. }.sum
  36. }
  37. func MostFreqKHashing(string, k) {
  38. gather {
  39. _MostFreqKHashing(string, k).each { |h|
  40. take("%s%d" % (h{:c}, h{:f}))
  41. }
  42. }.join
  43. }
  44. var str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
  45. var str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"
  46. say "str1 = #{str1.dump}"
  47. say "str2 = #{str2.dump}"
  48. say ''
  49. var h1 = MostFreqKHashing(str1, 2)
  50. var h2 = MostFreqKHashing(str2, 2)
  51. var sdf = MostFreqKSDF(str1, str2, 2, 100)
  52. say("MostFreqKHashing(str1, 2) = ", h1)
  53. say("MostFreqKHashing(str2, 2) = ", h2)
  54. say("MostFreqKSDF(str1, str2, 2, 100) = ", sdf)
  55. assert_eq(h1, "L9T8")
  56. assert_eq(h2, "F9L8")
  57. assert_eq(sdf, 83)
  58. say ''
  59. var arr = [
  60. %w(night nacht),
  61. %w(my a),
  62. %w(research research),
  63. %w(aaaaabbbb ababababa),
  64. %w(significant capabilities),
  65. ]
  66. var k = 2
  67. var limit = 10
  68. for s,t in arr {
  69. "mfkh(%s, %s, #{k}) = [%s, %s] (SDF: %d)\n".printf(
  70. s.dump, t.dump,
  71. MostFreqKHashing(s, k).dump,
  72. MostFreqKHashing(t, k).dump,
  73. MostFreqKSDF(s, t, k, limit),
  74. )
  75. }