uhferret 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #! /usr/bin/env ruby
  2. # This file is part of uhferret, providing a command-line interface.
  3. #
  4. # Author:: Peter Lane
  5. # Copyright:: Copyright 2012-20, Peter Lane.
  6. # License:: GPLv3
  7. #
  8. # uhferret is free software: you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation, either version 3 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # uhferret is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with uhferret. If not, see <http://www.gnu.org/licenses/>.
  20. require "optparse"
  21. require "uhferret"
  22. MAX_TABLE_SIZE = 100
  23. VERSION = "1.3.7"
  24. # ------------------------------------------------------------------
  25. #
  26. document_type = UHFerret::TextDocument
  27. input_format = :from_argv
  28. output_format = :similarity_table
  29. definitions_file = nil
  30. output_full_path = false
  31. xml_output_file = ""
  32. options = OptionParser.new do |opts|
  33. opts.banner = "Usage: uhferret [options] file1 file2 ..."
  34. opts.on("-h", "--help", "help message") do |v|
  35. puts options
  36. exit!
  37. end
  38. opts.on("-v", "--version", "version") do |v|
  39. puts "uhferret: version #{VERSION}"
  40. exit!
  41. end
  42. # -- document type
  43. opts.on("-c", "--code", "process documents as code") do |v|
  44. document_type = UHFerret::CodeDocument
  45. end
  46. opts.on("-t", "--text", "process documents as text (default)") do |v|
  47. document_type = UHFerret::TextDocument
  48. end
  49. # -- output format
  50. opts.on("-p", "--full-path", "output full path") do |v|
  51. output_full_path = true
  52. end
  53. opts.on("-d", "--data-table", "output similarity table (default)") do |v|
  54. output_format = :similarity_table
  55. end
  56. opts.on("-w", "--html-data-table", "output similarity table in html format") do |v|
  57. output_format = :html_similarity_table
  58. end
  59. opts.on("-l", "--list-trigrams", "output trigram list") do |v|
  60. output_format = :trigram_list
  61. end
  62. opts.on("-a", "--all-comparisons", "output list of all comparisons") do |v|
  63. output_format = :all_comparisons
  64. end
  65. opts.on("-x", "--xml-report OUTPUT_FILE", "generate xml report from two documents") do |file|
  66. output_format = :xml_output
  67. xml_output_file = file
  68. end
  69. # -- file source
  70. opts.on("-f FILE", "--definition-file FILE", "read document names from file") do |file|
  71. input_format = :from_file
  72. definitions_file = file
  73. end
  74. end
  75. begin
  76. # -- process input options
  77. options.parse!
  78. # -- check some errors
  79. if output_format == :xml_output
  80. unless ARGV.size == 2
  81. puts "Error: for xml report, only provide two input filenames"
  82. raise ArgumentError.new
  83. end
  84. end
  85. # -- add readable files, and run
  86. ferret = UHFerret::Ferret.new
  87. unless definitions_file.nil?
  88. if File.readable? definitions_file
  89. ferret.add_list_from_file(definitions_file, document_type)
  90. end
  91. end
  92. ARGV.each do |filename|
  93. if File.readable? filename
  94. ferret.add(filename, document_type)
  95. end
  96. end
  97. if ferret.size < 2
  98. puts "Error: not enough valid filenames"
  99. raise ArgumentError.new
  100. end
  101. ferret.run
  102. # -- display output
  103. case output_format
  104. when :similarity_table
  105. ferret.output_similarity_table output_full_path
  106. when :html_similarity_table
  107. ferret.output_html_similarity_table
  108. when :trigram_list
  109. ferret.output_trigram_list
  110. when :all_comparisons
  111. ferret.output_all_comparisons
  112. when :xml_output
  113. ferret.xml_output(xml_output_file, 0, 1)
  114. end
  115. rescue Exception => err
  116. puts err
  117. puts options
  118. end