123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- module SvmToolkit
- # The Evaluator module provides some classes and methods to construct
- # classes for evaluating the performance of a model against a dataset.
- # Different evaluators measure different kinds of performance.
- #
- # Evaluators are classes which provide the methods:
- #
- # * add_result(actual, prediction), called for every instance during evaluation
- # * value, to retrieve a measure of performance
- #
- class Evaluator
- # Constructs an evaluation class for the given label.
- # Stores the precision performance of the model against
- # the given label. Precision is the proportion of
- # correct responses out of all the instances assigned
- # this label. A high precision means the model is
- # mostly correctly when it assigns an instance into this
- # class.
- def Evaluator.ClassPrecision label
- Class.new(Evaluator) do
- @@label = label
- def initialize # :nodoc:
- @num_correct = 0
- @num_retrieved = 0
- end
- def add_result(actual, prediction) # :nodoc:
- if actual == @@label
- @num_retrieved += 1
- @num_correct += 1 if actual == prediction
- end
- end
- def value # :nodoc:
- if @num_retrieved.zero?
- 0.0
- else
- @num_correct.quo @num_retrieved
- end
- end
- def to_s # :nodoc:
- "Precision for label #{@@label}: #{value}"
- end
- end
- end
- # Constructs an evaluation class for the given label.
- # Stores the recall performance of the model against the
- # given label. Recall is the proportion of correct
- # responses out of all the instances with this label.
- # A high recall means that nearly all the actual members
- # of this class are identified.
- def Evaluator.ClassRecall label
- Class.new(Evaluator) do
- @@label = label
- def initialize # :nodoc:
- @num_correct = 0
- @num_predicted = 0
- end
- def add_result(actual, prediction) # :nodoc:
- if prediction == @@label
- @num_predicted += 1
- @num_correct += 1 if actual == prediction
- end
- end
- def value # :nodoc:
- if @num_predicted.zero?
- 0.0
- else
- @num_correct.quo @num_predicted
- end
- end
- def to_s # :nodoc:
- "Recall for label #{@@label}: #{value}"
- end
- end
- end
- # Computes the Matthews correlation coefficient of the model
- # The Matthews correlation coefficient is an indicator for
- # the similarity between the actual and predicted binary
- # classification.
- # More information is available at:
- # http://en.wikipedia.org/wiki/Matthews_correlation_coefficient
- def Evaluator.MatthewsCorrelationCoefficient positive_label
- Class.new(Evaluator) do
- @@positive_label = positive_label
- def initialize # :nodoc:
- @true_positives = 0
- @true_negatives = 0
- @false_positives = 0
- @false_negatives = 0
- end
- # Keeps a separate count of TP, FP, TN, FN.
- def add_result(actual, prediction) # :nodoc:
- case [actual == @@positive_label, prediction == @@positive_label]
- when [true, true]
- @true_positives += 1
- when [true, false]
- @false_negatives += 1
- when [false, false]
- @true_negatives += 1
- when [false, true]
- @false_positives += 1
- end
- end
- # Returns the Matthews Correlation Coefficient.
- def value # :nodoc:
- (@true_positives * @true_negatives - @false_positives * @false_negatives) /
- Math.sqrt(
- (@true_positives + @false_positives) * (@true_positives + @false_negatives) *
- (@true_negatives + @false_positives) * (@true_negatives + @false_negatives))
- end
- def to_s # :nodoc:
- "Matthews correlation coefficient: #{value}"
- end
- end
- end
- # This object is better than given object, if the
- # given object is an instance of nil, or the value
- # is better.
- def better_than? other
- other.nil? or self.value > other.value
- end
- end
- # Measures accuracy as the percentage of instances
- # correctly classified out of all the available instances.
- class OverallAccuracy < Evaluator
- attr_reader :num_correct
- def initialize
- @num_correct = 0
- @total = 0
- end
- # Given an actual and predicted class,
- # counts 1 for the new reading, and 1 if the prediction is correct.
- def add_result(actual, prediction)
- @total += 1
- @num_correct += 1 if prediction == actual
- end
- # Return the accuracy as a percentage.
- def value
- if @total.zero?
- 0.0
- else
- 100.0 * @num_correct / @total
- end
- end
- def to_s
- "Overall accuracy: #{value}%"
- end
- end
- # Computes the geometric mean of performance of the model.
- # The geometric mean is the nth root of the product of the
- # accuracies for each of the n classes (accuracy being
- # number correct divided by the number of instances
- # actually in that class).
- class GeometricMean < Evaluator
- # Structure to hold a count of instances in each group and
- # the number of those that were correct
- Result = Struct.new(:instances, :correct)
- def initialize
- @results = {}
- end
- # Given an _actual_ class and _prediction_,
- # keeps a separate count of how many instances gave the given prediction,
- # and how many of those were correct.
- def add_result(actual, prediction)
- result = @results.fetch(prediction, Result.new(0, 0))
- result.instances += 1
- result.correct += 1 if actual == prediction
- @results[prediction] = result
- end
- # Computes geometric mean, based on the counts for each prediction.
- def value
- if @results.empty?
- 0.0
- else
- @results.values.inject(1){|a,b| a*b.correct.quo(b.instances)} ** (1.quo(@results.size))
- end
- end
- def to_s
- "Geometric mean: #{value}"
- end
- end
- end
|