example-3.rb 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. # Example using svm_toolkit to classify letters,
  2. # using UCI's letter-recognition example.
  3. #
  4. # This example illustrates a grid search to find
  5. # the best model, using cross-validation on the number
  6. # of errors.
  7. #
  8. # (The grid-search technique is now built in to the library.)
  9. #
  10. # Written by Peter Lane, 2011.
  11. require "svm-toolkit"
  12. include SvmToolkit
  13. # For given problem and combination of cost/gamma,
  14. # train and return a model using RBF kernel.
  15. def train_rbf_model(problem, cost, gamma)
  16. Svm.svm_train(problem, Parameter.new(
  17. :svm_type => Parameter::C_SVC,
  18. :kernel_type => Parameter::RBF,
  19. :cost => cost,
  20. :gamma => gamma
  21. ))
  22. end
  23. # load letter-recognition data
  24. # -- given list of strings, first is label, rest are numbers of features
  25. def make_instance strings
  26. [strings.first.codepoints.first - 65, strings[1..-1].collect{|s| s.to_i / 15.0}]
  27. end
  28. def read_data filename
  29. data = []
  30. IO.foreach(filename) do |line|
  31. data << make_instance(line.split(","))
  32. end
  33. return data
  34. end
  35. def make_problem instances
  36. Problem.from_array(
  37. instances.collect{|i| i[1]},
  38. instances.collect{|i| i[0]}
  39. )
  40. end
  41. Dataset = read_data "letter-recognition.data"
  42. puts "Read #{Dataset.size} items"
  43. TrainingData = make_problem Dataset[0...2000]
  44. CrossSet = make_problem Dataset[2000...3000]
  45. TestSet = make_problem Dataset[3000..-1]
  46. Costs = [-5, -3, -1, 0, 1, 3, 5, 8, 10, 13, 15].collect {|n| 2**n}
  47. Gammas = [-15, -12, -8, -5, -3, -1, 1, 3, 5, 7, 9].collect {|n| 2**n}
  48. best_model = nil
  49. lowest_error = nil
  50. Costs.each do |cost|
  51. Gammas.each do |gamma|
  52. model = train_rbf_model(TrainingData, cost, gamma)
  53. result = model.evaluate_dataset(CrossSet)
  54. if result.better_than? lowest_error
  55. best_model = model
  56. lowest_error = result
  57. end
  58. puts "Testing: C = #{cost} G = #{gamma} -> #{result}"
  59. end
  60. end
  61. puts "Test set errors: #{best_model.evaluate_dataset(TestSet)}"