123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- #! /usr/bin/env ruby
- # This file is part of uhferret, providing a command-line interface.
- #
- # Author:: Peter Lane
- # Copyright:: Copyright 2012-20, Peter Lane.
- # License:: GPLv3
- #
- # uhferret is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # uhferret is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with uhferret. If not, see <http://www.gnu.org/licenses/>.
- require "optparse"
- require "uhferret"
- MAX_TABLE_SIZE = 100
- VERSION = "1.3.7"
- # ------------------------------------------------------------------
- #
- document_type = UHFerret::TextDocument
- input_format = :from_argv
- output_format = :similarity_table
- definitions_file = nil
- output_full_path = false
- xml_output_file = ""
- options = OptionParser.new do |opts|
- opts.banner = "Usage: uhferret [options] file1 file2 ..."
- opts.on("-h", "--help", "help message") do |v|
- puts options
- exit!
- end
- opts.on("-v", "--version", "version") do |v|
- puts "uhferret: version #{VERSION}"
- exit!
- end
- # -- document type
- opts.on("-c", "--code", "process documents as code") do |v|
- document_type = UHFerret::CodeDocument
- end
- opts.on("-t", "--text", "process documents as text (default)") do |v|
- document_type = UHFerret::TextDocument
- end
- # -- output format
- opts.on("-p", "--full-path", "output full path") do |v|
- output_full_path = true
- end
- opts.on("-d", "--data-table", "output similarity table (default)") do |v|
- output_format = :similarity_table
- end
- opts.on("-w", "--html-data-table", "output similarity table in html format") do |v|
- output_format = :html_similarity_table
- end
- opts.on("-l", "--list-trigrams", "output trigram list") do |v|
- output_format = :trigram_list
- end
- opts.on("-a", "--all-comparisons", "output list of all comparisons") do |v|
- output_format = :all_comparisons
- end
- opts.on("-x", "--xml-report OUTPUT_FILE", "generate xml report from two documents") do |file|
- output_format = :xml_output
- xml_output_file = file
- end
- # -- file source
- opts.on("-f FILE", "--definition-file FILE", "read document names from file") do |file|
- input_format = :from_file
- definitions_file = file
- end
- end
- begin
- # -- process input options
- options.parse!
- # -- check some errors
- if output_format == :xml_output
- unless ARGV.size == 2
- puts "Error: for xml report, only provide two input filenames"
- raise ArgumentError.new
- end
- end
- # -- add readable files, and run
- ferret = UHFerret::Ferret.new
- unless definitions_file.nil?
- if File.readable? definitions_file
- ferret.add_list_from_file(definitions_file, document_type)
- end
- end
- ARGV.each do |filename|
- if File.readable? filename
- ferret.add(filename, document_type)
- end
- end
- if ferret.size < 2
- puts "Error: not enough valid filenames"
- raise ArgumentError.new
- end
- ferret.run
- # -- display output
- case output_format
- when :similarity_table
- ferret.output_similarity_table output_full_path
- when :html_similarity_table
- ferret.output_html_similarity_table
- when :trigram_list
- ferret.output_trigram_list
- when :all_comparisons
- ferret.output_all_comparisons
- when :xml_output
- ferret.xml_output(xml_output_file, 0, 1)
- end
- rescue Exception => err
- puts err
- puts options
- end
|