evaluators.rb 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. module SvmToolkit
  2. # The Evaluator module provides some classes and methods to construct
  3. # classes for evaluating the performance of a model against a dataset.
  4. # Different evaluators measure different kinds of performance.
  5. #
  6. # Evaluators are classes which provide the methods:
  7. #
  8. # * add_result(actual, prediction), called for every instance during evaluation
  9. # * value, to retrieve a measure of performance
  10. #
  11. class Evaluator
  12. # Constructs an evaluation class for the given label.
  13. # Stores the precision performance of the model against
  14. # the given label. Precision is the proportion of
  15. # correct responses out of all the instances assigned
  16. # this label. A high precision means the model is
  17. # mostly correctly when it assigns an instance into this
  18. # class.
  19. def Evaluator.ClassPrecision label
  20. Class.new(Evaluator) do
  21. @@label = label
  22. def initialize # :nodoc:
  23. @num_correct = 0
  24. @num_retrieved = 0
  25. end
  26. def add_result(actual, prediction) # :nodoc:
  27. if actual == @@label
  28. @num_retrieved += 1
  29. @num_correct += 1 if actual == prediction
  30. end
  31. end
  32. def value # :nodoc:
  33. if @num_retrieved.zero?
  34. 0.0
  35. else
  36. @num_correct.quo @num_retrieved
  37. end
  38. end
  39. def to_s # :nodoc:
  40. "Precision for label #{@@label}: #{value}"
  41. end
  42. end
  43. end
  44. # Constructs an evaluation class for the given label.
  45. # Stores the recall performance of the model against the
  46. # given label. Recall is the proportion of correct
  47. # responses out of all the instances with this label.
  48. # A high recall means that nearly all the actual members
  49. # of this class are identified.
  50. def Evaluator.ClassRecall label
  51. Class.new(Evaluator) do
  52. @@label = label
  53. def initialize # :nodoc:
  54. @num_correct = 0
  55. @num_predicted = 0
  56. end
  57. def add_result(actual, prediction) # :nodoc:
  58. if prediction == @@label
  59. @num_predicted += 1
  60. @num_correct += 1 if actual == prediction
  61. end
  62. end
  63. def value # :nodoc:
  64. if @num_predicted.zero?
  65. 0.0
  66. else
  67. @num_correct.quo @num_predicted
  68. end
  69. end
  70. def to_s # :nodoc:
  71. "Recall for label #{@@label}: #{value}"
  72. end
  73. end
  74. end
  75. # Computes the Matthews correlation coefficient of the model
  76. # The Matthews correlation coefficient is an indicator for
  77. # the similarity between the actual and predicted binary
  78. # classification.
  79. # More information is available at:
  80. # http://en.wikipedia.org/wiki/Matthews_correlation_coefficient
  81. def Evaluator.MatthewsCorrelationCoefficient positive_label
  82. Class.new(Evaluator) do
  83. @@positive_label = positive_label
  84. def initialize # :nodoc:
  85. @true_positives = 0
  86. @true_negatives = 0
  87. @false_positives = 0
  88. @false_negatives = 0
  89. end
  90. # Keeps a separate count of TP, FP, TN, FN.
  91. def add_result(actual, prediction) # :nodoc:
  92. case [actual == @@positive_label, prediction == @@positive_label]
  93. when [true, true]
  94. @true_positives += 1
  95. when [true, false]
  96. @false_negatives += 1
  97. when [false, false]
  98. @true_negatives += 1
  99. when [false, true]
  100. @false_positives += 1
  101. end
  102. end
  103. # Returns the Matthews Correlation Coefficient.
  104. def value # :nodoc:
  105. (@true_positives * @true_negatives - @false_positives * @false_negatives) /
  106. Math.sqrt(
  107. (@true_positives + @false_positives) * (@true_positives + @false_negatives) *
  108. (@true_negatives + @false_positives) * (@true_negatives + @false_negatives))
  109. end
  110. def to_s # :nodoc:
  111. "Matthews correlation coefficient: #{value}"
  112. end
  113. end
  114. end
  115. # This object is better than given object, if the
  116. # given object is an instance of nil, or the value
  117. # is better.
  118. def better_than? other
  119. other.nil? or self.value > other.value
  120. end
  121. end
  122. # Measures accuracy as the percentage of instances
  123. # correctly classified out of all the available instances.
  124. class OverallAccuracy < Evaluator
  125. attr_reader :num_correct
  126. def initialize
  127. @num_correct = 0
  128. @total = 0
  129. end
  130. # Given an actual and predicted class,
  131. # counts 1 for the new reading, and 1 if the prediction is correct.
  132. def add_result(actual, prediction)
  133. @total += 1
  134. @num_correct += 1 if prediction == actual
  135. end
  136. # Return the accuracy as a percentage.
  137. def value
  138. if @total.zero?
  139. 0.0
  140. else
  141. 100.0 * @num_correct / @total
  142. end
  143. end
  144. def to_s
  145. "Overall accuracy: #{value}%"
  146. end
  147. end
  148. # Computes the geometric mean of performance of the model.
  149. # The geometric mean is the nth root of the product of the
  150. # accuracies for each of the n classes (accuracy being
  151. # number correct divided by the number of instances
  152. # actually in that class).
  153. class GeometricMean < Evaluator
  154. # Structure to hold a count of instances in each group and
  155. # the number of those that were correct
  156. Result = Struct.new(:instances, :correct)
  157. def initialize
  158. @results = {}
  159. end
  160. # Given an _actual_ class and _prediction_,
  161. # keeps a separate count of how many instances gave the given prediction,
  162. # and how many of those were correct.
  163. def add_result(actual, prediction)
  164. result = @results.fetch(prediction, Result.new(0, 0))
  165. result.instances += 1
  166. result.correct += 1 if actual == prediction
  167. @results[prediction] = result
  168. end
  169. # Computes geometric mean, based on the counts for each prediction.
  170. def value
  171. if @results.empty?
  172. 0.0
  173. else
  174. @results.values.inject(1){|a,b| a*b.correct.quo(b.instances)} ** (1.quo(@results.size))
  175. end
  176. end
  177. def to_s
  178. "Geometric mean: #{value}"
  179. end
  180. end
  181. end