example-5.rb 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # Example using svm_toolkit to classify letters,
  2. # using UCI's letter-recognition example.
  3. #
  4. # This example illustrates use of built-in grid search to find
  5. # the best model, using cross-validation on the number
  6. # of errors. Displays a contour map of search results.
  7. #
  8. # Written by Peter Lane, 2011.
  9. require "svm-toolkit"
  10. include SvmToolkit
  11. # load letter-recognition data
  12. # -- given list of strings, first is label, rest are numbers of features
  13. def make_instance strings
  14. [strings.first.codepoints.first - 65, strings[1..-1].collect{|s| s.to_i / 15.0}]
  15. end
  16. def read_data filename
  17. data = []
  18. IO.foreach(filename) do |line|
  19. data << make_instance(line.split(","))
  20. end
  21. return data
  22. end
  23. def make_problem instances
  24. Problem.from_array(
  25. instances.collect{|i| i[1]},
  26. instances.collect{|i| i[0]}
  27. )
  28. end
  29. Dataset = read_data "letter-recognition.data"
  30. puts "Read #{Dataset.size} items"
  31. TrainingData = make_problem Dataset[0...200] # for speed, only train on 200 instances
  32. CrossSet = make_problem Dataset[2000...3000]
  33. TestSet = make_problem Dataset[3000..-1]
  34. Costs = [-5, -3, -1, 0, 1, 3, 5, 8, 10, 13, 15].collect {|n| 2**n}
  35. Gammas = [-15, -12, -8, -5, -3, -1, 1, 3, 5, 7, 9].collect {|n| 2**n}
  36. best_model = Svm.cross_validation_search(TrainingData, CrossSet, Costs, Gammas, :show_plot => true)
  37. puts "Test set has #{best_model.evaluate_dataset(TestSet, :evaluator => Evaluator::GeometricMean)}"
  38. best_model.save "best.dat"