123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- #!/usr/bin/ruby
- #
- ## https://rosettacode.org/wiki/Most_frequent_k_chars_distance
- #
- func _MostFreqKHashing(string, k) {
- var seen = Hash()
- var chars = string.chars
- var freq = chars.freq
- var schars = freq.keys.sort_by {|c| -freq{c} }
- var mfkh = []
- k.times { |i|
- chars.each { |c|
- seen{c} && next
- if (freq{c} == freq{schars[i]}) {
- seen{c} = true
- mfkh << Hash(c => c, f => freq{c})
- break
- }
- }
- }
- mfkh << (k-seen.len -> of { Hash(c => :NULL, f => 0) }...)
- mfkh
- }
- func MostFreqKSDF(a, b, k, d) {
- var mfkh_a = _MostFreqKHashing(a, k);
- var mfkh_b = _MostFreqKHashing(b, k);
- d - gather {
- mfkh_a.each { |s|
- s{:c} == :NULL && next
- mfkh_b.each { |t|
- s{:c} == t{:c} &&
- take(s{:f} + (s{:f} == t{:f} ? 0 : t{:f}))
- }
- }
- }.sum
- }
- func MostFreqKHashing(string, k) {
- gather {
- _MostFreqKHashing(string, k).each { |h|
- take("%s%d" % (h{:c}, h{:f}))
- }
- }.join
- }
- var str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
- var str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"
- say "str1 = #{str1.dump}"
- say "str2 = #{str2.dump}"
- say ''
- var h1 = MostFreqKHashing(str1, 2)
- var h2 = MostFreqKHashing(str2, 2)
- var sdf = MostFreqKSDF(str1, str2, 2, 100)
- say("MostFreqKHashing(str1, 2) = ", h1)
- say("MostFreqKHashing(str2, 2) = ", h2)
- say("MostFreqKSDF(str1, str2, 2, 100) = ", sdf)
- assert_eq(h1, "L9T8")
- assert_eq(h2, "F9L8")
- assert_eq(sdf, 83)
- say ''
- var arr = [
- %w(night nacht),
- %w(my a),
- %w(research research),
- %w(aaaaabbbb ababababa),
- %w(significant capabilities),
- ]
- var k = 2
- var limit = 10
- for s,t in arr {
- "mfkh(%s, %s, #{k}) = [%s, %s] (SDF: %d)\n".printf(
- s.dump, t.dump,
- MostFreqKHashing(s, k).dump,
- MostFreqKHashing(t, k).dump,
- MostFreqKSDF(s, t, k, limit),
- )
- }
|