peterlane
/
svm_toolkit


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
							# Example using svm_toolkit to classify letters,
# using UCI's letter-recognition example.
#
# This example illustrates a grid search to find 
# the best model, using cross-validation on the number 
# of errors.  
#
# (The grid-search technique is now built in to the library.)
#
# Written by Peter Lane, 2011.

require "svm-toolkit"
include SvmToolkit

# For given problem and combination of cost/gamma,
# train and return a model using RBF kernel.
def train_rbf_model(problem, cost, gamma)
  Svm.svm_train(problem, Parameter.new(
    :svm_type => Parameter::C_SVC,
    :kernel_type => Parameter::RBF,
    :cost => cost,
    :gamma => gamma
  ))
end

# load letter-recognition data

# -- given list of strings, first is label, rest are numbers of features
def make_instance strings
  [strings.first.codepoints.first - 65, strings[1..-1].collect{|s| s.to_i / 15.0}]
end

def read_data filename
  data = []
  IO.foreach(filename) do |line|
    data << make_instance(line.split(","))
  end

  return data
end

def make_problem instances
  Problem.from_array(
    instances.collect{|i| i[1]}, 
    instances.collect{|i| i[0]}
  )
end

Dataset = read_data "letter-recognition.data"
puts "Read #{Dataset.size} items"
TrainingData = make_problem Dataset[0...2000]
CrossSet = make_problem Dataset[2000...3000]
TestSet = make_problem Dataset[3000..-1]

Costs = [-5, -3, -1, 0, 1, 3, 5, 8, 10, 13, 15].collect {|n| 2**n}
Gammas = [-15, -12, -8, -5, -3, -1, 1, 3, 5, 7, 9].collect {|n| 2**n}

best_model = nil
lowest_error = nil

Costs.each do |cost|
  Gammas.each do |gamma|
    model = train_rbf_model(TrainingData, cost, gamma)
    result = model.evaluate_dataset(CrossSet)
    if result.better_than? lowest_error
      best_model = model
      lowest_error = result
    end
    puts "Testing: C = #{cost}  G = #{gamma} -> #{result}"
  end
end

puts "Test set errors: #{best_model.evaluate_dataset(TestSet)}"