peterlane
/
svm_toolkit


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
							module SvmToolkit
  # The Evaluator module provides some classes and methods to construct 
  # classes for evaluating the performance of a model against a dataset.  
  # Different evaluators measure different kinds of performance.  
  #
  # Evaluators are classes which provide the methods:
  #
  # * add_result(actual, prediction), called for every instance during evaluation
  # * value, to retrieve a measure of performance
  #
  class Evaluator

    # Constructs an evaluation class for the given label.
    # Stores the precision performance of the model against 
    # the given label.  Precision is the proportion of 
    # correct responses out of all the instances assigned 
    # this label.  A high precision means the model is 
    # mostly correctly when it assigns an instance into this 
    # class.
    def Evaluator.ClassPrecision label
      Class.new(Evaluator) do 
        @@label = label

        def initialize # :nodoc:
          @num_correct = 0
          @num_retrieved = 0
        end

        def add_result(actual, prediction) # :nodoc:
          if actual == @@label
            @num_retrieved += 1
            @num_correct += 1 if actual == prediction
          end
        end

        def value # :nodoc:
          if @num_retrieved.zero?
            0.0
          else
            @num_correct.quo @num_retrieved
          end
        end

        def to_s # :nodoc:
          "Precision for label #{@@label}: #{value}"
        end
      end
    end

    # Constructs an evaluation class for the given label.
    # Stores the recall performance of the model against the 
    # given label.  Recall is the proportion of correct 
    # responses out of all the instances with this label.
    # A high recall means that nearly all the actual members 
    # of this class are identified.
    def Evaluator.ClassRecall label
      Class.new(Evaluator) do 
        @@label = label

        def initialize # :nodoc:
          @num_correct = 0
          @num_predicted = 0
        end

        def add_result(actual, prediction) # :nodoc:
          if prediction == @@label
            @num_predicted += 1
            @num_correct += 1 if actual == prediction
          end
        end

        def value # :nodoc:
          if @num_predicted.zero?
            0.0
          else
            @num_correct.quo @num_predicted
          end
        end

        def to_s # :nodoc:
          "Recall for label #{@@label}: #{value}"
        end
      end
    end

    # Computes the Matthews correlation coefficient of the model
    # The Matthews correlation coefficient is an indicator for
    # the similarity between the actual and predicted binary
    # classification.
    # More information is available at:
    # http://en.wikipedia.org/wiki/Matthews_correlation_coefficient
    def Evaluator.MatthewsCorrelationCoefficient positive_label
      Class.new(Evaluator) do 
        @@positive_label = positive_label

        def initialize # :nodoc:
          @true_positives = 0
          @true_negatives = 0
          @false_positives = 0
          @false_negatives = 0
        end

        # Keeps a separate count of TP, FP, TN, FN.
        def add_result(actual, prediction) # :nodoc:
          case [actual == @@positive_label, prediction == @@positive_label]
          when [true, true]
            @true_positives += 1
          when [true, false]
            @false_negatives += 1
          when [false, false]
            @true_negatives += 1
          when [false, true]
            @false_positives += 1
          end
        end

        # Returns the Matthews Correlation Coefficient.
        def value # :nodoc:
          (@true_positives * @true_negatives - @false_positives * @false_negatives) /
            Math.sqrt(
              (@true_positives + @false_positives) * (@true_positives + @false_negatives) *
              (@true_negatives + @false_positives) * (@true_negatives + @false_negatives))
        end

        def to_s # :nodoc:
          "Matthews correlation coefficient: #{value}"
        end
      end
    end

    # This object is better than given object, if the 
    # given object is an instance of nil, or the value 
    # is better.
    def better_than? other
      other.nil? or self.value > other.value
    end
  end

  # Measures accuracy as the percentage of instances 
  # correctly classified out of all the available instances.
  class OverallAccuracy < Evaluator
    attr_reader :num_correct

    def initialize
      @num_correct = 0
      @total = 0
    end

    # Given an actual and predicted class, 
    # counts 1 for the new reading, and 1 if the prediction is correct.
    def add_result(actual, prediction)
      @total += 1
      @num_correct += 1 if prediction == actual
    end

    # Return the accuracy as a percentage.
    def value
      if @total.zero? 
        0.0
      else
        100.0 * @num_correct / @total
      end
    end

    def to_s
      "Overall accuracy: #{value}%"
    end
  end

  # Computes the geometric mean of performance of the model.
  # The geometric mean is the nth root of the product of the 
  # accuracies for each of the n classes (accuracy being 
  # number correct divided by the number of instances 
  # actually in that class).
  class GeometricMean < Evaluator
    # Structure to hold a count of instances in each group and 
    # the number of those that were correct
    Result = Struct.new(:instances, :correct)

    def initialize
      @results = {}
    end

    # Given an _actual_ class and _prediction_, 
    # keeps a separate count of how many instances gave the given prediction, 
    # and how many of those were correct.
    def add_result(actual, prediction)
      result = @results.fetch(prediction, Result.new(0, 0))
      result.instances += 1
      result.correct += 1 if actual == prediction
      @results[prediction] = result
    end

    # Computes geometric mean, based on the counts for each prediction.
    def value
      if @results.empty?
        0.0
      else
        @results.values.inject(1){|a,b| a*b.correct.quo(b.instances)} ** (1.quo(@results.size))
      end
    end

    def to_s
      "Geometric mean: #{value}"
    end
  end

end