123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- // Copyright 2008 Rarefied Technologies, Inc.
- // Distributed under the GPL v2 please see
- // LICENSE file for more information.
- #include "SVMUtil.h"
- #include <string>
- #include <fstream>
- #include <algorithm>
- #include <cfloat>
- #include <cmath>
- #include <iostream>
- #include "parametersearch.h"
- #include "parameterresult.h"
- #include "../thirdparty/libsvm/svm.h"
- #include "../thirdparty/boost/serialization/set.hpp"
- void myfunction(int);
- //#define SCALED_MAX 1
- //#define SCALED_MIN 0 //assume features > 0
- //#define SCALED_MIN -1 // if features can be < 0
- //svm_problem ParseTrainingFile(string strFilename);
- #define MAX_LINE_LENGTH 1024
- SVMUtil::SVMUtil()
- {
- m_pProblem = NULL;
- m_pScaleFactors = NULL;
- m_nParams = 0;
- m_pModel = NULL;
- }
- SVMUtil::~SVMUtil()
- {
- // problem destroyed when the model is
- //delete m_pProblem;
- //m_pProblem = NULL;
- if(m_pModel)
- svm_destroy_model(m_pModel);
- }
- // borrowed from read_problem in svm-train.c
- svm_problem* SVMUtil::ParseTrainingFile(std::string strFilename)
- {
- m_pProblem = new svm_problem;
- svm_node *x_space;
- svm_parameter param;
-
- const char* filename = strFilename.c_str();
-
- int elements, i, j;
- FILE *fp = fopen(filename,"r");
-
- if(fp == NULL)
- {
- fprintf(stderr,"can't open input file %s\n",filename);
- exit(1);
- }
- m_pProblem->l = 0;
- elements = 0;
- while(1)
- {
- int c = fgetc(fp);
- switch(c)
- {
- case '\n':
- ++m_pProblem->l;
- // fall through,
- // count the '-1' element
- case ':':
- ++elements;
- break;
- case EOF:
- goto out;
- default:
- ;
- }
- }
- out:
- rewind(fp);
- m_pProblem->y = Malloc(double,m_pProblem->l);
- m_pProblem->x = Malloc(struct svm_node *,m_pProblem->l);
-
- int nParamCountGuess = elements / m_pProblem->l;
- m_nParams = 0;
- for(i=0;i<m_pProblem->l;i++)
- {
- double label;
- x_space = Malloc(struct svm_node, nParamCountGuess+1);
- m_pProblem->x[i] = x_space;
- fscanf(fp,"%lf",&label);
- m_pProblem->y[i] = label;
-
- j=0;
-
- while(1)
- {
- int c;
- do {
- c = getc(fp);
- if(c=='\n') goto out2;
- } while(isspace(c));
- ungetc(c,fp);
-
- int nIndex;
- double dValue;
-
- if (fscanf(fp,"%d:%lf", &nIndex, &dValue) < 2)
- {
- fprintf(stderr,"Wrong input format at line %d\n", i+1);
- exit(1);
- }
- if(dValue!=0)
- {
- x_space[j].index = nIndex;
- x_space[j].value = dValue;
-
- ++j;
- }
- }
- out2:
- if(j>=1 && x_space[j-1].index > m_nParams)
- m_nParams = x_space[j-1].index;
- x_space[j++].index = -1;
- }
- if(param.gamma == 0)
- param.gamma = 1.0/m_nParams;
- if(param.kernel_type == PRECOMPUTED)
- for(i=0;i<m_pProblem->l;i++)
- {
- if (m_pProblem->x[i][0].index != 0)
- {
- fprintf(stderr,"Wrong input format: first column must be 0:sample_serial_number\n");
- exit(1);
- }
- if ((int)m_pProblem->x[i][0].value <= 0 || (int)m_pProblem->x[i][0].value > m_nParams)
- {
- fprintf(stderr,"Wrong input format: sample_serial_number out of range\n");
- exit(1);
- }
- }
- fclose(fp);
-
- ScaleTrainingData();
- SaveScaleFactors(strFilename + ".sf");
-
- return m_pProblem;
- }
- bool SVMUtil::ScaleTrainingData()
- {
- if(!m_pProblem)
- {
- assert(0);
- return false;
- }
-
- if(!DetermineScaleFactors())
- return false;
-
- svm_node* pNode = NULL;
-
- for(int i=0; i < m_pProblem->l; i++)
- {
- pNode = m_pProblem->x[i];
- ScaleNode(pNode);
- }
-
- return true;
- }
- bool SVMUtil::DetermineScaleFactors()
- {
- if(!m_pProblem)
- return false;
-
- svm_node* pNode = NULL;
- double* pMaxValue = Malloc(double, m_nParams);
- m_pScaleFactors = Malloc(double, m_nParams);
-
- for(int j=0; j < m_nParams; j++)
- {
- pMaxValue[j] = 0; // assumes values should be scaled between 0 and 1
- }
-
- for(int i=0; i < m_pProblem->l; i++)
- {
- pNode = m_pProblem->x[i];
- int nindex = 0;
- int j=0;
- while(pNode)
- {
- nindex = pNode[j].index;
- if(nindex==-1)
- break;
-
- pMaxValue[nindex-1] = max(pMaxValue[nindex-1], pNode[j].value); // assume values are positive
- j++;
- }
- }
-
- for(int j=0; j < m_nParams; j++)
- {
- if(pMaxValue[j] > 0)
- m_pScaleFactors[j] = (double)1./pMaxValue[j];
- else
- m_pScaleFactors[j] = 1;
- }
- return true;
- }
- bool SVMUtil::ScaleNode(svm_node* pNode)
- {
- if(!pNode)
- {
- cerr << "error scaling" << endl;
- assert(0);
- return false;
- }
- if(!m_pScaleFactors)
- {
- if(m_pProblem)
- DetermineScaleFactors();
- else
- {
- assert(0);
- return false;
- }
- }
-
- int i = 0;
-
- while(pNode[i].index != -1)
- {
- pNode[i].value *= m_pScaleFactors[pNode[i].index-1];
- i++;
- }
- return true;
- }
- bool SVMUtil::ParameterSearch(svm_parameter* pSvmParam, string strFilename)
- {
- if(!m_pProblem || !pSvmParam)
- return false;
-
- //struct sigaction sa;
- //sa.sa_handler = &myfunction;
- //sigaction(SIGINT, &sa, NULL);
-
- CParameterSearch* paramSearch = new CParameterSearch(m_pProblem, pSvmParam, strFilename);
-
- //SaveSearch(paramSearch);
- delete paramSearch;
-
- return true;
- }
- bool SVMUtil::SaveSearch(const CParameterSearch* p_Search)
- {
- std::ofstream ofs("searchResults.txt");
- boost::archive::text_oarchive oa(ofs);
- oa << *p_Search;
- return true;
- }
-
- void myfunction(int number)
- {
- int ten;
- int five = number;
- ten = five + five;
-
- }
- bool SVMUtil::Load(string filename)
- {
- if(m_pModel)
- svm_destroy_model(m_pModel);
-
- string modelname = filename + ".mod";
- string scalename = filename + ".sf";
- m_pModel = svm_load_model(modelname.c_str());
- LoadScaleFactors(scalename);
-
- return true;
- }
- bool SVMUtil::Save(string filename)
- {
- string modelname = filename + ".mod";
- string scalename = filename + ".sf";
- svm_save_model(modelname.c_str(), m_pModel);
- SaveScaleFactors(scalename);
-
- return true;
- }
- void SVMUtil::SaveScaleFactors(string filename)
- {
- ofstream fout;
- fout.open(filename.c_str());
- fout << m_nParams << endl;
- for(int i=0; i<m_nParams; i++)
- {
- fout << m_pScaleFactors[i] << endl;
- }
- fout.close();
- }
- bool SVMUtil::LoadScaleFactors(string filename)
- {
- ifstream fin;
- fin.open(filename.c_str());
- fin >> m_nParams;
-
- if(m_pScaleFactors)
- delete m_pScaleFactors;
- m_pScaleFactors = new double[m_nParams];
-
- for(int i=0; i<m_nParams; i++)
- {
- fin >> m_pScaleFactors[i];
- }
- return true;
- }
- bool SVMUtil::CrossValidate(int nFolds, svm_parameter* pParam)
- {
- double* target = new double[m_pProblem->l];
- if(nFolds>0)
- svm_cross_validation(m_pProblem, pParam, nFolds, target);
- else
- {
- if(!m_pModel)
- m_pModel = svm_train(m_pProblem, pParam);
- for(int i=0; i<m_pProblem->l; i++)
- {
- target[i] = svm_predict(m_pModel, m_pProblem->x[i]);
- }
- }
-
- float fError = 0;
- float fWrong = 0;
- for(int i=0; i<m_pProblem->l; i++)
- {
- fError += abs(m_pProblem->y[i] - target[i]) ;
- if( m_pProblem->y[i] >= 0.5 && target[i] < 0.5)
- fWrong++;
- else if( m_pProblem->y[i] < 0.5 && target[i] >= 0.5)
- fWrong++;
-
- }
- fError = (float)fError/m_pProblem->l;
- fWrong = (float) fWrong/m_pProblem->l;
-
- float fStdDev = 0;
- for(int i=0; i<m_pProblem->l; i++)
- {
- fStdDev += pow(fError - abs(m_pProblem->y[i] - target[i]), 2) ;
- }
- fStdDev = pow(fStdDev, (float)0.5) / m_pProblem->l;
-
- std::cout << "Percent wrong: " << fWrong << " Avg Error: " << fError << " Std Dev: " << fStdDev << std::endl;
- return true;
- }
|