peterlane
/
svm_toolkit


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
							# This file is part of svm_toolkit.
#
# Author::    Peter Lane
# Copyright:: Copyright 2011-13, Peter Lane.
# License::   MIT Licence
#

# The Evaluator module provides some classes and methods to construct 
# classes for evaluating the performance of a model against a dataset.  
# Different evaluators measure different kinds of performance.  
#
# Evaluators are classes which provide the methods:
# * add_result(actual, prediction), called for every instance during evaluation
# * value, to retrieve a measure of performance
# * better_than?(evaluator), to compare performance between two evaluators
module Evaluator

  # Measures accuracy as the percentage of instances 
  # correctly classified out of all the available instances.
  class OverallAccuracy 
    attr_reader :num_correct

    def initialize
      @num_correct = 0
      @total = 0
    end

    def add_result(actual, prediction)
      @total += 1
      @num_correct += 1 if prediction == actual
    end

    # This object is better than given object, if the 
    # given object is an instance of nil, or the accuracy 
    # is better
    def better_than? other
      other.nil? or self.num_correct > other.num_correct
    end

    # Return the accuracy as a percentage.
    def value
      if @total.zero? 
        0.0
      else
        100.0 * @num_correct / @total
      end
    end

    def to_s
      "Overall accuracy: #{value}%"
    end
  end

  # Computes the geometric mean of performance of the model.
  # The geometric mean is the nth root of the product of the 
  # accuracies for each of the n classes (accuracy being 
  # number correct divided by the number of instances 
  # actually in that class).
  class GeometricMean
    Result = Struct.new(:instances, :correct)

    def initialize
      @results = {}
    end

    def add_result(actual, prediction)
      result = @results.fetch(prediction, Result.new(0, 0))
      result.instances += 1
      result.correct += 1 if actual == prediction
      @results[prediction] = result
    end

    def value
      if @results.empty?
        0.0
      else
        @results.values.inject(1){|a,b| a*b.correct.quo(b.instances)} ** (1.quo(@results.size))
      end
    end

    def better_than? other
      other.nil? or self.value < other.value
    end

    def to_s
      "Geometric mean: #{value}"
    end
  end

  # Constructs an evaluation class for the given label.
  # Stores the precision performance of the model against 
  # the given label.  Precision is the proportion of 
  # correct responses out of all the instances assigned 
  # this label.  A high precision means the model is 
  # mostly correctly when it assigns an instance into this 
  # class.
  def Evaluator.ClassPrecision label
    Class.new do
      @@label = label

      def initialize
        @num_correct = 0
        @num_retrieved = 0
      end

      def add_result(actual, prediction)
        if actual == @@label
          @num_retrieved += 1
          @num_correct += 1 if actual == prediction
        end
      end

      def value
        if @num_retrieved.zero?
          0.0
        else
          @num_correct.quo @num_retrieved
        end
      end

      def better_than? other
        other.nil? or self.value < other.value
      end

      def to_s
        "Precision for label #{@@label}: #{value}"
      end
    end
  end

  # Constructs an evaluation class for the given label.
  # Stores the recall performance of the model against the 
  # given label.  Recall is the proportion of correct 
  # responses out of all the instances with this label.
  # A high recall means that nearly all the actual members 
  # of this class are identified.
  def Evaluator.ClassRecall label
    Class.new do
      @@label = label

      def initialize
        @num_correct = 0
        @num_predicted = 0
      end

      def add_result(actual, prediction)
        if prediction == @@label
          @num_predicted += 1
          @num_correct += 1 if actual == prediction
        end
      end

      def value
        if @num_predicted.zero?
          0.0
        else
          @num_correct.quo @num_predicted
        end
      end

      def better_than? other
        other.nil? or self.value < other.value
      end

      def to_s
        "Recall for label #{@@label}: #{value}"
      end
    end
  end

  # Computes the Matthews correlation coefficient of the model
  # The Matthews correlation coefficient is an indicator for
  # the similarity between the actual and predicted binary
  # classification.
  # More information is available at:
  # http://en.wikipedia.org/wiki/Matthews_correlation_coefficient
  def Evaluator.MatthewsCorrelationCoefficient positive_label
    Class.new do
      @@positive_label = positive_label

      def initialize
        @true_positives = 0
        @true_negatives = 0
        @false_positives = 0
        @false_negatives = 0
      end

      def add_result(actual, prediction)
        case [actual == @@positive_label, prediction == @@positive_label]
          when [true, true]
            @true_positives += 1
          when [true, false]
            @false_negatives += 1
          when [false, false]
            @true_negatives += 1
          when [false, true]
            @false_positives += 1
        end
      end

      def value
        (@true_positives * @true_negatives - @false_positives * @false_negatives) /
            Math.sqrt(
                (@true_positives + @false_positives) * (@true_positives + @false_negatives) *
                    (@true_negatives + @false_positives) * (@true_negatives + @false_negatives))
      end

      def better_than? other
        other.nil? or self.value < other.value
      end

      def to_s
        "Matthews correlation coefficient: #{value}"
      end
    end
  end
end