latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*---------------------------------------------------------------- 00025 * project ....: LTI Digital Image/Signal Processing Library 00026 * file .......: ltiSvm.h 00027 * authors ....: Jochen Wickel 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 30.10.2001 00030 * revisions ..: $Id: ltiSVM.h,v 1.7 2006/02/07 18:24:45 ltilib Exp $ 00031 */ 00032 00033 #ifndef _LTI_SVM_H_ 00034 #define _LTI_SVM_H_ 00035 00036 #include "ltiSupervisedInstanceClassifier.h" 00037 #include "ltiObjectFactory.h" 00038 #include "ltiKernelFunctor.h" 00039 #include "ltiLinearKernel.h" 00040 #include "ltiClassName.h" 00041 00042 namespace lti { 00043 /** 00044 * Implements a support vector machine (SVM). The SVM is trained 00045 * using the SMO algorithm (sequential minimal optimization) by 00046 * J.C. Platt (presented at NIPS 1999). 00047 * 00048 * The kernel function is determined by an %object of type 00049 * kernelFunctor<double>. svm contains an inner class called 00050 * linearKernel which is the default. If your application requires another 00051 * kernel function, you have to find one in another place. 00052 */ 00053 class svm : public supervisedInstanceClassifier { 00054 public: 00055 00056 /** 00057 * the parameters for the class svm 00058 */ 00059 class parameters : public supervisedInstanceClassifier::parameters { 00060 public: 00061 00062 /** 00063 * default constructor 00064 */ 00065 parameters(); 00066 00067 /** 00068 * copy constructor 00069 * @param other the parameters object to be copied 00070 */ 00071 parameters(const parameters& other); 00072 00073 /** 00074 * destructor 00075 */ 00076 virtual ~parameters(); 00077 00078 /** 00079 * returns name of this type. 00080 */ 00081 const char* getTypeName() const; 00082 00083 /** 00084 * copy the contents of a parameters object 00085 * @param other the parameters object to be copied 00086 * @return a reference to this parameters object 00087 */ 00088 parameters& copy(const parameters& other); 00089 00090 /** 00091 * copy the contents of a parameters object 00092 * @param other the parameters object to be copied 00093 * @return a reference to this parameters object 00094 */ 00095 parameters& operator=(const parameters& other); 00096 00097 00098 /** 00099 * returns a pointer to a clone of the parameters 00100 */ 00101 virtual classifier::parameters* clone() const; 00102 00103 /** 00104 * Sets a new kernel function. A copy of the kernel will be 00105 * done (so it is useless to change the parameters of the given 00106 * kernel instance, because the internal kernel will never 00107 * notice the changes done to its "parent"). 00108 */ 00109 void setKernel(const kernelFunctor<double>& k); 00110 00111 /** 00112 * Sets a new kernel function. The kernel which is passed here as an 00113 * argument must have been allocated with new; it must not be 00114 * a local variable. On destruction of the parameters object, 00115 * the kernel will be deleted, i.e. this parameters instance will be 00116 * responsible for the memory managment of the object. 00117 */ 00118 void attachKernel(kernelFunctor<double>* k); 00119 00120 /** 00121 * Sets a new kernel function. The kernel which is passed here as an 00122 * argument is not deleted by the parameters object, the caller 00123 * must ensure that there are no memory leaks. 00124 */ 00125 void useExternalKernel(kernelFunctor<double>* k); 00126 00127 /** 00128 * create a new kernel function, using its class name. A pointer to the 00129 * kernel function is returned, so that the caller may modify 00130 * the kernel's parameters. 00131 */ 00132 kernelFunctor<double>* createKernel(const std::string& name) const; 00133 00134 /** 00135 * write the parameters in the given ioHandler 00136 * @param handler the ioHandler to be used 00137 * @param complete if true (the default) the enclosing begin/end will 00138 * be also written, otherwise only the data block will be written. 00139 * @return true if write was successful 00140 */ 00141 virtual bool write(ioHandler& handler,const bool complete=true) const; 00142 00143 /** 00144 * read the parameters from the given ioHandler 00145 * @param handler the ioHandler to be used 00146 * @param complete if true (the default) the enclosing begin/end will 00147 * be also written, otherwise only the data block will be written. 00148 * @return true if write was successful 00149 */ 00150 virtual bool read(ioHandler& handler,const bool complete=true); 00151 00152 # ifdef _LTI_MSC_6 00153 /** 00154 * this function is required by MSVC only, as a workaround for a 00155 * very awful bug, which exists since MSVC V.4.0, and still by 00156 * V.6.0 with all bugfixes (so called "service packs") remains 00157 * there... This method is also public due to another bug, so please 00158 * NEVER EVER call this method directly: use read() instead 00159 */ 00160 bool readMS(ioHandler& handler,const bool complete=true); 00161 00162 /** 00163 * this function is required by MSVC only, as a workaround for a 00164 * very awful bug, which exists since MSVC V.4.0, and still by 00165 * V.6.0 with all bugfixes (so called "service packs") remains 00166 * there... This method is also public due to another bug, so please 00167 * NEVER EVER call this method directly: use write() instead 00168 */ 00169 bool writeMS(ioHandler& handler,const bool complete=true) const; 00170 # endif 00171 00172 // ------------------------------------------------ 00173 // the parameters 00174 // ------------------------------------------------ 00175 00176 /** 00177 * The number of support vectors that must be used. 00178 * 00179 * Default value: 0 00180 */ 00181 int nSupport; 00182 00183 /** 00184 * The "C" parameter. The meaning is explained in the standard SVM 00185 * literature; the boiled down version is that C controls the 00186 * generalization ability of the SVM. The larger C, the more 00187 * accurate the recognition on the training set. On the other hand, 00188 * the generalization performance is better with a smaller C. 00189 * 00190 * Default value: 1 00191 */ 00192 double C; 00193 00194 /** 00195 * The kernel function. Try to use the kernel setting methods in order 00196 * to ensure a consistent memory managment of the pointed instance. 00197 * @see setKernel(),attachKernel(),useExternalKernel() 00198 * 00199 * Default value: "linearKernel" 00200 */ 00201 kernelFunctor<double>* kernel; 00202 00203 /** 00204 * The initial threshold of the SVM. 00205 * 00206 * Default value: 1 00207 */ 00208 double bias; 00209 00210 /** 00211 * The tolerance for detecting a violation of the KKT conditions. 00212 * 00213 * Default value: 1e-3 00214 */ 00215 double tolerance; 00216 00217 /** 00218 * Epsilon for detecting change in alpha value. 00219 * 00220 * Default value: 1e-12 00221 */ 00222 double epsilon; 00223 00224 /** 00225 * Is true if the result should be normalized to sum 1 00226 * 00227 * Default value: false 00228 */ 00229 bool sumToOne; 00230 00231 /* 00232 * This parameter value is not needed anymore: If a kernel 00233 * vector is to be used, it has to be passed to the train 00234 * method anyway, so we can always know for sure, if there 00235 * is a single kernel type or a kernel for each class. 00236 * 00237 * If this flag is true, the kernel given in this object is 00238 * ignored; instead, the caller must provide with the 00239 * method lti::svm::buildKernelVector a map, which assigns to each 00240 * class id a specific kernel function. 00241 * If false, just one kernel for all used SVM will be used. 00242 * 00243 * \warning The kernel vector functionality is not ready yet, so 00244 * use always false here! 00245 * 00246 * Default value: false 00247 * 00248 bool useKernelVector; 00249 */ 00250 00251 /** 00252 * If this flag is true, the training data are normalized to 00253 * mean zero and variance one before training the svm. 00254 * This means that a copy of the training data is required. 00255 * The transformation is stored and also used for testing. 00256 * However, it is essential for SVM training that the data have 00257 * low magnitudes. If the data have a variance of 100, the 00258 * training simply may fail. 00259 * The default value is false. 00260 */ 00261 bool normalizeData; 00262 00263 /** 00264 * The normal SVM algorithm uses a one-against-all scheme for 00265 * multi-class classification. However, as is pointed out in 00266 * ..., this is suboptimal in some cases. Therefore, you can 00267 * use a combination of one-against-one classifiers (n*(n-1)/2 00268 * for n classes). 00269 * Default: false. 00270 */ 00271 bool usePairwise; 00272 00273 private: 00274 /** 00275 * array of objects needed by the functor factory 00276 */ 00277 static const kernelFunctor<double>* kernelArray[]; 00278 00279 /** 00280 * kernel functor factory 00281 */ 00282 static objectFactory<kernelFunctor<double> > kfa; 00283 00284 /** 00285 * class name object used to detect the type name of an object instance. 00286 */ 00287 className cn; 00288 00289 /** 00290 * Flag used to inidicate if the local kernel functor must be deleted 00291 * by this object or not (just pointing to some external instance...) 00292 */ 00293 bool destroyKernel; 00294 }; 00295 00296 /** 00297 * default constructor 00298 */ 00299 svm(); 00300 00301 /** 00302 * copy constructor 00303 * @param other the object to be copied 00304 */ 00305 svm(const svm& other); 00306 00307 /** 00308 * destructor 00309 */ 00310 virtual ~svm(); 00311 00312 /** 00313 * returns the name of this type ("svm") 00314 */ 00315 virtual const char* getTypeName() const; 00316 00317 /** 00318 * copy data of "other" clustering. 00319 * @param other the clustering to be copied 00320 * @return a reference to this clustering object 00321 */ 00322 svm& copy(const svm& other); 00323 00324 /** 00325 * alias for copy member 00326 * @param other the clustering to be copied 00327 * @return a reference to this clustering object 00328 */ 00329 svm& operator=(const svm& other); 00330 00331 /** 00332 * returns a pointer to a clone of this clustering. 00333 */ 00334 virtual classifier* clone() const; 00335 00336 /** 00337 * returns used parameters 00338 */ 00339 const parameters& getParameters() const; 00340 00341 /** 00342 * Supervised training. The vectors in the <code>input</code> matrix 00343 * are arranged row-wise, i.e. each row contains one data vector. 00344 * The <code>ids</code> vector contains the class label for each row. 00345 * This functor implements the 00346 * <a href="http://www.research.microsoft.com/~jplatt/smo.html">SMO algorithm</a> by 00347 * <a href="http://www.research.microsoft.com/~jplatt">J.C. Platt</a>. 00348 * All classes are modeled using the same kernel function, given 00349 * in the parameters object. 00350 * 00351 * @param input the matrix with the input vectors (each row is a training 00352 * vector) 00353 * @param ids vector of class ids for each input point 00354 * @return true if successful, false otherwise. (if false you can check 00355 * the error message with getStatusString()) 00356 */ 00357 virtual bool train(const dmatrix& input, 00358 const ivector& ids); 00359 00360 /** 00361 * Supervised training. The vectors in the <code>input</code> matrix 00362 * are arranged row-wise, i.e. each row contains one data vector. 00363 * The <code>ids</code> vector contains the class label for each row. 00364 * This functor implements the 00365 * <a href="http://www.research.microsoft.com/~jplatt/smo.html">SMO algorithm</a> by 00366 * <a href="http://www.research.microsoft.com/~jplatt">J.C. Platt</a>. 00367 * Each class is modeled using a separate kernel function given by 00368 * the <code>kernels</code> map. This map consists of assignments 00369 * of each class label to a kernelFunctor object. Care must be taken 00370 * that the kernel functor object ist not deallocated while the 00371 * SVM functor is in use. 00372 * 00373 * @param input the matrix with the input vectors (each row is a training 00374 * vector) 00375 * @param ids vector of class ids for each input point 00376 * @param kernels map of kernel functions. Each functor uses 00377 * @return true if successful, false otherwise. (if false you can check 00378 * the error message with getStatusString()) 00379 */ 00380 virtual bool train(const dmatrix& input, 00381 const ivector& ids, 00382 const std::map<int,kernelFunctor<double>*>& kernels); 00383 00384 //TODO Check whether you really need a new classify method. 00385 // In some cases the superclasses method will suffice. Then just 00386 // delete the declaration and its implementation stump. 00387 00388 /* 00389 * Classification. 00390 * Classifies the feature and returns the output %object with 00391 * the classification result. 00392 * @param feature pattern to be classified 00393 * @return result of the classifications as a classifier::output 00394 */ 00395 /* virtual const output& classify(const dvector& feature); */ 00396 00397 00398 /** 00399 * Classification. 00400 * Classifies the feature and returns the outputVector %object with 00401 * the classification result. 00402 * <p><b>NOTE:</b> This method is NOT really const. Although the main 00403 * members of the svm are not changed some state variables used for 00404 * efficiency are. Thus, it is not save to use the same instance of the 00405 * svm in two different threads. 00406 * @param feature pattern to be classified 00407 * @param result of the classifications as a classifier::outputVector 00408 * @return true if the classification has been successful 00409 */ 00410 virtual bool classify(const dvector& feature, outputVector& result) const; 00411 00412 //void classify(const dvector& feature, dvector& result) const; 00413 00414 /** 00415 * Returns the alpha matrix of the classifier. The matrix contains 00416 * one row for each class. Each row contains the alpha values of a 00417 * SVM that distinguishes this class from all other classes. 00418 */ 00419 void getAlphas(dmatrix& result) const { 00420 result=alpha; 00421 } 00422 00423 /** 00424 * Returns the bias vector of the classifier. The vector contains 00425 * the bias value for each class. Each element is the bias value 00426 * of a SVM that distinguishes this class from all other classes. 00427 */ 00428 void getBiases(dvector& result) const { 00429 result=bias; 00430 } 00431 00432 /** 00433 * Sets a new kernel function. The kernel which is passed here will be 00434 * copied, and the parameters instance will control the memory 00435 * management. 00436 */ 00437 void setKernel(const kernelFunctor<double>& k); 00438 00439 /** 00440 * Sets a new kernel function. The kernel which is passed here as an 00441 * argument must have been allocated with new; it must not be 00442 * a local variable. On destruction of the parameters object, 00443 * the kernel will be deleted, i.e. the parameters instance will take 00444 * control over the memory management of the kernel functor. 00445 */ 00446 void attachKernel(kernelFunctor<double>* k); 00447 00448 /** 00449 * Sets a new kernel function. The kernel which is passed here as an 00450 * argument is not deleted by the parameters object, the caller 00451 * must ensure that there are no memory leaks. 00452 */ 00453 void useExternalKernel(kernelFunctor<double>* k); 00454 00455 00456 /** 00457 * write the classifier in the given ioHandler 00458 * @param handler the ioHandler to be used 00459 * @param complete if true (the default) the enclosing begin/end will 00460 * be also written, otherwise only the data block will be written. 00461 * @return true if write was successful 00462 */ 00463 virtual bool write(ioHandler& handler,const bool complete=true) const; 00464 00465 /** 00466 * read the classifier from the given ioHandler 00467 * @param handler the ioHandler to be used 00468 * @param complete if true (the default) the enclosing begin/end will 00469 * be also written, otherwise only the data block will be written. 00470 * @return true if write was successful 00471 */ 00472 virtual bool read(ioHandler& handler,const bool complete=true); 00473 00474 protected: 00475 void buildIdMaps(const ivector& ids); 00476 00477 void buildKernelVector(const std::map<int,kernelFunctor<double>* >& ks); 00478 00479 void buildKernelVector(); 00480 00481 virtual bool genericTrain(const dmatrix& input, 00482 const ivector& ids); 00483 00484 /** 00485 * Copy the given ids to srcIds and build the target matrix. 00486 */ 00487 void makeTargets(const ivector& ids); 00488 00489 /** 00490 * Rebuild the target matrix from srcIds. 00491 */ 00492 void rebuildTargets(); 00493 00494 ivector srcIds; 00495 00496 private: 00497 00498 bool takeStep(const int& i1, const int& i2); 00499 00500 void updateBiasError(const double& deltab); 00501 00502 void setAlpha(const int &k, const double& a); 00503 00504 bool examineExample(const int& i2); 00505 00506 double objectiveFunction(); 00507 00508 inline double outputFunction(const dvector& x, 00509 const int& currentClass ) const; 00510 00511 void fillErrorCache(); 00512 00513 void defineOutputTemplate(); 00514 00515 bool genericNormTrain(const dmatrix& input, const ivector& ids); 00516 00517 00518 // the number of classes 00519 int nClasses; 00520 00521 // the alpha matrix, one for each class 00522 // each row contains the alpha values for the corresponding 00523 // class. 00524 dmatrix alpha; 00525 00526 // the target values for each training vector 00527 // each row contains the target vector for one class 00528 const dmatrix* target; 00529 00530 // this matrix contains all training vectors. 00531 // each row contains one vector 00532 const dmatrix* trainData; 00533 00534 // parameters, can be taken from parameter object 00535 double epsilon; 00536 00537 // system epsilon, i.e. the smallest number that can be added to zero 00538 // and returns something different than zero for the type double. 00539 const double syseps; 00540 00541 // SVM constant C that controls how strong the patterns must 00542 // be separable to allow a successful training. If C is very large, 00543 // the training patterns must be linear separable in the feature space. 00544 double C; 00545 00546 double tolerance; 00547 00548 // is modified during training, has to be saved 00549 dvector bias; 00550 00551 // this vector contains one kernel functor per class 00552 std::vector<kernelFunctor<double>*> kernels; 00553 00554 // map from external id to internal id, used for training 00555 std::map<int,int> idMap; 00556 // map from internal to external id, used for training 00557 std::map<int,int> rIdMap; 00558 00559 // this is a temporary variable during training 00560 dvector errorCache; 00561 00562 // the sum of the elements of the errorCache 00563 double errorSum; 00564 00565 // temporary variables for training the SVM 00566 double y2,e2,alph2; 00567 00568 // temporary variables for training 00569 int currentClass; 00570 dvector* currentAlpha; 00571 const dvector* currentTarget; 00572 00573 00574 // the transformation for normalizing the data 00575 dvector offset; 00576 //dvector scale; 00577 double scale; 00578 }; 00579 00580 // -------------------------------------- 00581 // inline - methods implementation 00582 // -------------------------------------- 00583 00584 inline double svm::outputFunction(const dvector& x, 00585 const int& currentClass ) const { 00586 00587 double u=0; 00588 const dvector* currentAlpha=&alpha.getRow(currentClass); 00589 const dvector* currentTarget=&target->getRow(currentClass); 00590 for (int i=0; i<currentAlpha->size(); i++) { 00591 if (currentAlpha->at(i) > 0) { 00592 u+=currentAlpha->at(i)*currentTarget->at(i)*kernels[currentClass]->apply(trainData->getRow(i),x); 00593 } 00594 } 00595 u-=bias[currentClass]; 00596 return u; 00597 } 00598 00599 } 00600 00601 #endif