example-1.rb 1.1 KB

12345678910111213141516171819202122232425262728293031323334
  1. # example-1
  2. # Illustrates creating an instance of uhferret and
  3. # finding the similarity of some sample documents.
  4. require "uhferret"
  5. # Construct an instance of ferret and add some documents to it.
  6. ferret = UHFerret::Ferret.new
  7. ferret.add "text-eg/ruby.txt"
  8. ferret.add "text-eg/cobra.txt"
  9. ferret.add "text-eg/fantom.txt"
  10. # Run ferret to compute similarities.
  11. # This step can take a long time, if there are many documents.
  12. ferret.run
  13. # Print out information for every document.
  14. ferret.each do |doc|
  15. puts "Document name: #{doc.filename}"
  16. end
  17. # Print out some information for every pair of documents.
  18. ferret.each_pair do |i, j|
  19. puts "For document #{i} - #{ferret[i].filename}"
  20. puts "and document #{j} - #{ferret[j].filename}"
  21. puts "Resemblance is #{ferret.resemblance(i, j)}"
  22. puts "Containment of #{i} in #{j} is #{ferret.containment(i, j)}"
  23. puts "Containment of #{j} in #{i} is #{ferret.containment(j, i)}"
  24. puts "Doc #{i} has #{ferret.trigram_count(i)} trigrams, \
  25. doc #{j} has #{ferret.trigram_count(j)} trigrams, and \
  26. they share #{ferret.trigram_matches(i, j)} trigrams"
  27. end