ltiSVM.h

00001 /*
00002  * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*----------------------------------------------------------------
00025  * project ....: LTI Digital Image/Signal Processing Library
00026  * file .......: ltiSvm.h
00027  * authors ....: Jochen Wickel
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 30.10.2001
00030  * revisions ..: $Id: ltiSVM.h,v 1.7 2006/02/07 18:24:45 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_SVM_H_
00034 #define _LTI_SVM_H_
00035 
00036 #include "ltiSupervisedInstanceClassifier.h"
00037 #include "ltiObjectFactory.h"
00038 #include "ltiKernelFunctor.h"
00039 #include "ltiLinearKernel.h"
00040 #include "ltiClassName.h"
00041 
00042 namespace lti {
00043   /**
00044    * Implements a support vector machine (SVM). The SVM is trained
00045    * using the SMO algorithm (sequential minimal optimization) by
00046    * J.C. Platt (presented at NIPS 1999).
00047    *
00048    * The kernel function is determined by an %object of type
00049    * kernelFunctor<double>. svm contains an inner class called
00050    * linearKernel which is the default. If your application requires another
00051    * kernel function, you have to find one in another place.
00052    */
00053   class svm : public supervisedInstanceClassifier {
00054   public:
00055 
00056     /**
00057      * the parameters for the class svm
00058      */
00059     class parameters : public supervisedInstanceClassifier::parameters {
00060     public:
00061 
00062       /**
00063        * default constructor
00064        */
00065       parameters();
00066 
00067       /**
00068        * copy constructor
00069        * @param other the parameters object to be copied
00070        */
00071       parameters(const parameters& other);
00072 
00073       /**
00074        * destructor
00075        */
00076       virtual ~parameters();
00077 
00078       /**
00079        * returns name of this type.
00080        */
00081       const char* getTypeName() const;
00082 
00083       /**
00084        * copy the contents of a parameters object
00085        * @param other the parameters object to be copied
00086        * @return a reference to this parameters object
00087        */
00088       parameters& copy(const parameters& other);
00089 
00090       /**
00091        * copy the contents of a parameters object
00092        * @param other the parameters object to be copied
00093        * @return a reference to this parameters object
00094        */
00095       parameters& operator=(const parameters& other);
00096 
00097 
00098       /**
00099        * returns a pointer to a clone of the parameters
00100        */
00101       virtual classifier::parameters* clone() const;
00102 
00103       /**
00104        * Sets a new kernel function.  A copy of the kernel will be
00105        * done (so it is useless to change the parameters of the given
00106        * kernel instance, because the internal kernel will never
00107        * notice the changes done to its "parent").
00108        */
00109       void setKernel(const kernelFunctor<double>& k);
00110 
00111       /**
00112        * Sets a new kernel function. The kernel which is passed here as an
00113        * argument must have been allocated with new; it must not be
00114        * a local variable. On destruction of the parameters object,
00115        * the kernel will be deleted, i.e. this parameters instance will be
00116        * responsible for the memory managment of the object.
00117        */
00118       void attachKernel(kernelFunctor<double>* k);
00119 
00120       /**
00121        * Sets a new kernel function. The kernel which is passed here as an
00122        * argument is not deleted by the parameters object, the caller
00123        * must ensure that there are no memory leaks.
00124        */
00125       void useExternalKernel(kernelFunctor<double>* k);
00126 
00127       /**
00128        * create a new kernel function, using its class name. A pointer to the
00129        * kernel function is returned, so that the caller may modify
00130        * the kernel's parameters.
00131        */
00132       kernelFunctor<double>* createKernel(const std::string& name) const;
00133 
00134       /**
00135        * write the parameters in the given ioHandler
00136        * @param handler the ioHandler to be used
00137        * @param complete if true (the default) the enclosing begin/end will
00138        *        be also written, otherwise only the data block will be written.
00139        * @return true if write was successful
00140        */
00141       virtual bool write(ioHandler& handler,const bool complete=true) const;
00142 
00143       /**
00144        * read the parameters from the given ioHandler
00145        * @param handler the ioHandler to be used
00146        * @param complete if true (the default) the enclosing begin/end will
00147        *        be also written, otherwise only the data block will be written.
00148        * @return true if write was successful
00149        */
00150       virtual bool read(ioHandler& handler,const bool complete=true);
00151 
00152 #     ifdef _LTI_MSC_6
00153       /**
00154        * this function is required by MSVC only, as a workaround for a
00155        * very awful bug, which exists since MSVC V.4.0, and still by
00156        * V.6.0 with all bugfixes (so called "service packs") remains
00157        * there...  This method is also public due to another bug, so please
00158        * NEVER EVER call this method directly: use read() instead
00159        */
00160       bool readMS(ioHandler& handler,const bool complete=true);
00161 
00162       /**
00163        * this function is required by MSVC only, as a workaround for a
00164        * very awful bug, which exists since MSVC V.4.0, and still by
00165        * V.6.0 with all bugfixes (so called "service packs") remains
00166        * there...  This method is also public due to another bug, so please
00167        * NEVER EVER call this method directly: use write() instead
00168        */
00169       bool writeMS(ioHandler& handler,const bool complete=true) const;
00170 #     endif
00171 
00172       // ------------------------------------------------
00173       // the parameters
00174       // ------------------------------------------------
00175 
00176       /**
00177        * The number of support vectors that must be used.
00178        *
00179        * Default value: 0
00180        */
00181       int nSupport;
00182 
00183       /**
00184        * The "C" parameter. The meaning is explained in the standard SVM
00185        * literature; the boiled down version is that C controls the
00186        * generalization ability of the SVM. The larger C, the more
00187        * accurate the recognition on the training set. On the other hand,
00188        * the generalization performance is better with a smaller C.
00189        *
00190        * Default value: 1
00191        */
00192       double C;
00193 
00194       /**
00195        * The kernel function.  Try to use the kernel setting methods in order
00196        * to ensure a consistent memory managment of the pointed instance.
00197        * @see setKernel(),attachKernel(),useExternalKernel()
00198        *
00199        * Default value: "linearKernel"
00200        */
00201       kernelFunctor<double>* kernel;
00202 
00203       /**
00204        * The initial threshold of the SVM.
00205        *
00206        * Default value: 1
00207        */
00208       double bias;
00209 
00210       /**
00211        * The tolerance for detecting a violation of the KKT conditions.
00212        *
00213        * Default value: 1e-3
00214        */
00215       double tolerance;
00216 
00217       /**
00218        * Epsilon for detecting change in alpha value.
00219        *
00220        * Default value: 1e-12
00221        */
00222       double epsilon;
00223 
00224       /**
00225        * Is true if the result should be normalized to sum 1
00226        *
00227        * Default value: false
00228        */
00229       bool sumToOne;
00230 
00231       /*
00232        * This parameter value is not needed anymore: If a kernel
00233        * vector is to be used, it has to be passed to the train
00234        * method anyway, so we can always know for sure, if there
00235        * is a single kernel type or a kernel for each class.
00236        *
00237        * If this flag is true, the kernel given in this object is
00238        * ignored; instead, the caller must provide with the
00239        * method lti::svm::buildKernelVector a map, which assigns to each
00240        * class id a specific kernel function.
00241        * If false, just one kernel for all used SVM will be used.
00242        *
00243        * \warning The kernel vector functionality is not ready yet, so
00244        * use always false here!
00245        *
00246        * Default value: false
00247        *
00248       bool useKernelVector;
00249       */
00250 
00251       /**
00252        * If this flag is true, the training data are normalized to
00253        * mean zero and variance one before training the svm.
00254        * This means that a copy of the training data is required.
00255        * The transformation is stored and also used for testing.
00256        * However, it is essential for SVM training that the data have
00257        * low magnitudes. If the data have a variance of 100, the
00258        * training simply may fail.
00259        * The default value is false.
00260        */
00261       bool normalizeData;
00262 
00263       /**
00264        * The normal SVM algorithm uses a one-against-all scheme for
00265        * multi-class classification. However, as is pointed out in
00266        * ..., this is suboptimal in some cases. Therefore, you can
00267        * use a combination of one-against-one classifiers (n*(n-1)/2
00268        * for n classes).
00269        * Default: false.
00270        */
00271       bool usePairwise;
00272 
00273     private:
00274       /**
00275        * array of objects needed by the functor factory
00276        */
00277       static const kernelFunctor<double>* kernelArray[];
00278 
00279       /**
00280        * kernel functor factory
00281        */
00282       static objectFactory<kernelFunctor<double> > kfa;
00283 
00284       /**
00285        * class name object used to detect the type name of an object instance.
00286        */
00287       className cn;
00288 
00289       /**
00290        * Flag used to inidicate if the local kernel functor must be deleted
00291        * by this object or not (just pointing to some external instance...)
00292        */
00293       bool destroyKernel;
00294     };
00295 
00296     /**
00297      * default constructor
00298      */
00299     svm();
00300 
00301     /**
00302      * copy constructor
00303      * @param other the object to be copied
00304      */
00305     svm(const svm& other);
00306 
00307     /**
00308      * destructor
00309      */
00310     virtual ~svm();
00311 
00312     /**
00313      * returns the name of this type ("svm")
00314      */
00315     virtual const char* getTypeName() const;
00316 
00317     /**
00318      * copy data of "other" clustering.
00319      * @param other the clustering to be copied
00320      * @return a reference to this clustering object
00321      */
00322     svm& copy(const svm& other);
00323 
00324     /**
00325      * alias for copy member
00326      * @param other the clustering to be copied
00327      * @return a reference to this clustering object
00328      */
00329     svm& operator=(const svm& other);
00330 
00331     /**
00332      * returns a pointer to a clone of this clustering.
00333      */
00334     virtual classifier* clone() const;
00335 
00336     /**
00337      * returns used parameters
00338      */
00339     const parameters& getParameters() const;
00340 
00341     /**
00342      * Supervised training.  The vectors in the <code>input</code> matrix
00343      * are arranged row-wise, i.e. each row contains one data vector.
00344      * The <code>ids</code> vector contains the class label for each row.
00345      * This functor implements the
00346      * <a href="http://www.research.microsoft.com/~jplatt/smo.html">SMO algorithm</a> by
00347      * <a href="http://www.research.microsoft.com/~jplatt">J.C. Platt</a>.
00348      * All classes are modeled using the same kernel function, given
00349      * in the parameters object.
00350      *
00351      * @param input the matrix with the input vectors (each row is a training
00352      *              vector)
00353      * @param ids vector of class ids for each input point
00354      * @return true if successful, false otherwise. (if false you can check
00355      *              the error message with getStatusString())
00356      */
00357     virtual bool train(const dmatrix& input,
00358                        const ivector& ids);
00359 
00360     /**
00361      * Supervised training.  The vectors in the <code>input</code> matrix
00362      * are arranged row-wise, i.e. each row contains one data vector.
00363      * The <code>ids</code> vector contains the class label for each row.
00364      * This functor implements the
00365      * <a href="http://www.research.microsoft.com/~jplatt/smo.html">SMO algorithm</a> by
00366      * <a href="http://www.research.microsoft.com/~jplatt">J.C. Platt</a>.
00367      * Each class is modeled using a separate kernel function given by
00368      * the <code>kernels</code> map. This map consists of assignments
00369      * of each class label to a kernelFunctor object. Care must be taken
00370      * that the kernel functor object ist not deallocated while the
00371      * SVM functor is in use.
00372      *
00373      * @param input the matrix with the input vectors (each row is a training
00374      *              vector)
00375      * @param ids vector of class ids for each input point
00376      * @param kernels map of kernel functions. Each functor uses
00377      * @return true if successful, false otherwise. (if false you can check
00378      *              the error message with getStatusString())
00379      */
00380     virtual bool train(const dmatrix& input,
00381                        const ivector& ids,
00382                        const std::map<int,kernelFunctor<double>*>& kernels);
00383 
00384     //TODO Check whether you really need a new classify method.
00385     // In some cases the superclasses method will suffice. Then just
00386     // delete the declaration and its implementation stump.
00387 
00388     /*
00389      * Classification.
00390      * Classifies the feature and returns the output %object with
00391      * the classification result.
00392      * @param feature pattern to be classified
00393      * @return result of the classifications as a classifier::output
00394      */
00395 /*      virtual const output& classify(const dvector& feature); */
00396 
00397 
00398     /**
00399      * Classification.
00400      * Classifies the feature and returns the outputVector %object with
00401      * the classification result.
00402      * <p><b>NOTE:</b> This method is NOT really const. Although the main
00403      * members of the svm are not changed some state variables used for
00404      * efficiency are. Thus, it is not save to use the same instance of the
00405      * svm in two different threads.
00406      * @param feature pattern to be classified
00407      * @param result of the classifications as a classifier::outputVector
00408      * @return true if the classification has been successful
00409      */
00410     virtual bool classify(const dvector& feature, outputVector& result) const;
00411 
00412     //void classify(const dvector& feature, dvector& result) const;
00413 
00414     /**
00415      * Returns the alpha matrix of the classifier. The matrix contains
00416      * one row for each class. Each row contains the alpha values of a
00417      * SVM that distinguishes this class from all other classes.
00418      */
00419     void getAlphas(dmatrix& result) const {
00420       result=alpha;
00421     }
00422 
00423     /**
00424      * Returns the bias vector of the classifier. The vector contains
00425      * the bias value for each class. Each element is the bias value
00426      * of a SVM that distinguishes this class from all other classes.  
00427      */
00428     void getBiases(dvector& result) const {
00429       result=bias;
00430     }
00431 
00432     /**
00433      * Sets a new kernel function. The kernel which is passed here will be
00434      * copied, and the parameters instance will control the memory
00435      * management.
00436      */
00437     void setKernel(const kernelFunctor<double>& k);
00438 
00439     /**
00440      * Sets a new kernel function. The kernel which is passed here as an
00441      * argument must have been allocated with new; it must not be
00442      * a local variable. On destruction of the parameters object,
00443      * the kernel will be deleted, i.e. the parameters instance will take
00444      * control over the memory management of the kernel functor.
00445      */
00446     void attachKernel(kernelFunctor<double>* k);
00447 
00448     /**
00449      * Sets a new kernel function. The kernel which is passed here as an
00450      * argument is not deleted by the parameters object, the caller
00451      * must ensure that there are no memory leaks.
00452      */
00453     void useExternalKernel(kernelFunctor<double>* k);
00454 
00455 
00456     /**
00457      * write the classifier in the given ioHandler
00458      * @param handler the ioHandler to be used
00459      * @param complete if true (the default) the enclosing begin/end will
00460      *        be also written, otherwise only the data block will be written.
00461      * @return true if write was successful
00462      */
00463     virtual bool write(ioHandler& handler,const bool complete=true) const;
00464 
00465     /**
00466      * read the classifier from the given ioHandler
00467      * @param handler the ioHandler to be used
00468      * @param complete if true (the default) the enclosing begin/end will
00469      *        be also written, otherwise only the data block will be written.
00470      * @return true if write was successful
00471      */
00472     virtual bool read(ioHandler& handler,const bool complete=true);
00473 
00474   protected:
00475     void buildIdMaps(const ivector& ids);
00476 
00477     void buildKernelVector(const std::map<int,kernelFunctor<double>* >& ks);
00478 
00479     void buildKernelVector();
00480 
00481     virtual bool genericTrain(const dmatrix& input,
00482                               const ivector& ids);
00483 
00484     /**
00485      * Copy the given ids to srcIds and build the target matrix.
00486      */
00487     void makeTargets(const ivector& ids);
00488 
00489     /**
00490      * Rebuild the target matrix from srcIds.
00491      */
00492     void rebuildTargets();
00493 
00494     ivector srcIds;
00495 
00496   private:
00497 
00498     bool takeStep(const int& i1, const int& i2);
00499 
00500     void updateBiasError(const double& deltab);
00501 
00502     void setAlpha(const int &k, const double& a);
00503 
00504     bool examineExample(const int& i2);
00505 
00506     double objectiveFunction();
00507 
00508     inline double outputFunction(const dvector& x,
00509                                  const int& currentClass ) const;
00510 
00511     void fillErrorCache();
00512 
00513     void defineOutputTemplate();
00514 
00515     bool genericNormTrain(const dmatrix& input, const ivector& ids);
00516 
00517 
00518     // the number of classes
00519     int nClasses;
00520 
00521     // the alpha matrix, one for each class
00522     // each row contains the alpha values for the corresponding
00523     // class.
00524     dmatrix alpha;
00525 
00526     // the target values for each training vector
00527     // each row contains the target vector for one class
00528     const dmatrix* target;
00529 
00530     // this matrix contains all training vectors.
00531     // each row contains one vector
00532     const dmatrix* trainData;
00533 
00534     // parameters, can be taken from parameter object
00535     double epsilon;
00536 
00537     // system epsilon, i.e. the smallest number that can be added to zero
00538     // and returns something different than zero for the type double.
00539     const double syseps;
00540 
00541     // SVM constant C that controls how strong the patterns must
00542     // be separable to allow a successful training.  If C is very large,
00543     // the training patterns must be linear separable in the feature space.
00544     double C;
00545 
00546     double tolerance;
00547 
00548     // is modified during training, has to be saved
00549     dvector bias;
00550 
00551     // this vector contains one kernel functor per class
00552     std::vector<kernelFunctor<double>*> kernels;
00553 
00554     // map from external id to internal id, used for training
00555     std::map<int,int> idMap;
00556     // map from internal to external id, used for training
00557     std::map<int,int> rIdMap;
00558 
00559     // this is a temporary variable during training
00560     dvector errorCache;
00561 
00562     // the sum of the elements of the errorCache
00563     double errorSum;
00564 
00565     // temporary variables for training the SVM
00566     double y2,e2,alph2;
00567 
00568     // temporary variables for training
00569     int currentClass;
00570     dvector* currentAlpha;
00571     const dvector* currentTarget;
00572 
00573 
00574     // the transformation for normalizing the data
00575     dvector offset;
00576     //dvector scale;
00577     double scale;
00578   };
00579 
00580   // --------------------------------------
00581   // inline - methods implementation
00582   // --------------------------------------
00583 
00584   inline double svm::outputFunction(const dvector& x,
00585                                     const int& currentClass ) const {
00586 
00587     double u=0;
00588     const dvector* currentAlpha=&alpha.getRow(currentClass);
00589     const dvector* currentTarget=&target->getRow(currentClass);
00590     for (int i=0; i<currentAlpha->size(); i++) {
00591       if (currentAlpha->at(i) > 0) {
00592         u+=currentAlpha->at(i)*currentTarget->at(i)*kernels[currentClass]->apply(trainData->getRow(i),x);
00593       }
00594     }
00595     u-=bias[currentClass];
00596     return u;
00597   }
00598 
00599 }
00600 
00601 #endif