LTI-Lib latest version v1.9 - last update 10 Apr 2010

ltiMSTClustering.h

00001 /*
00002  * Copyright (C) 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiMSTClustering.h
00027  * authors ....: Jens Paustenbach
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 22.4.2003
00030  * revisions ..: $Id: ltiMSTClustering.h,v 1.14 2006/02/07 18:20:28 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_M_S_T_CLUSTERING_H_
00034 #define _LTI_M_S_T_CLUSTERING_H_
00035 
00036 #include "ltiWeightedGraph.h"
00037 #include "ltiClustering.h"
00038 #include "ltiKdTree.h"
00039 #include "ltiL2Distance.h"
00040 
00041 namespace lti {
00042   /**
00043    * This class implements a minimum spanning tree clustering.
00044    * 
00045    * First a minimum spanning tree of the input data is computed.
00046    * Then all edges that are unusually large compared the 
00047    * neighboring edges are removed. The clusters are the connected 
00048    * components of the minimum spanning tree after the removal of 
00049    * the inconsistent edges.
00050    *
00051    * As template parameter the distantor accoding to the distance measure 
00052    * you want to use has to be specified.
00053    */
00054   template<class U=l2SquareDistantor<dvector> >
00055   class MSTClustering : public clustering {
00056   public:
00057     /**
00058      * the parameters for the class MSTClustering
00059      */
00060     class parameters : public clustering::parameters {
00061     public:
00062 
00063       /** 
00064        * The different methods that can be used to compute a probability 
00065        * from a distance.
00066        */ 
00067       enum eProbabilityFromDistanceTypes {
00068         InterClusterDistanceIndependend,
00069         InterClusterDistanceDependend
00070       };
00071 
00072       /**
00073        * default constructor
00074        */
00075       parameters() : clustering::parameters() {   
00076         devFac = 2.0;
00077         nbMaxSteps = 4;
00078         variance = 0.333333;
00079         probabilityFromDistanceMode = InterClusterDistanceDependend;
00080       }
00081 
00082       /**
00083        * copy constructor
00084        * @param other the parameters object to be copied
00085        */
00086       parameters(const parameters& other) : clustering::parameters() {
00087         copy(other);
00088       }
00089 
00090       /**
00091        * destructor
00092        */
00093       ~parameters() {
00094       }
00095 
00096       /**
00097        * returns name of this type
00098        */
00099       const char* getTypeName() const {
00100         return "MSTClustering<U>::parameters";
00101       }
00102 
00103       /**
00104        * copy the contents of a parameters object
00105        * @param other the parameters object to be copied
00106        * @return a reference to this parameters object
00107        */
00108       parameters& copy(const parameters& other) {
00109 # ifndef _LTI_MSC_6
00110         // MS Visual C++ 6 is not able to compile this...
00111         clustering::parameters::copy(other);
00112 # else
00113         // ...so we have to use this workaround.
00114         // Conditional on that, copy may not be virtual.
00115         clustering::parameters& (clustering::parameters::* p_copy)
00116           (const clustering::parameters&) =
00117           clustering::parameters::copy;
00118         (this->*p_copy)(other);
00119 # endif
00120 
00121         devFac = other.devFac;
00122         nbMaxSteps = other.nbMaxSteps;
00123         variance = other.variance;
00124         probabilityFromDistanceMode = other.probabilityFromDistanceMode;
00125 
00126 
00127         return *this;
00128       }
00129 
00130       /**
00131        * copy the contents of a parameters object
00132        * @param other the parameters object to be copied
00133        * @return a reference to this parameters object
00134        */
00135       parameters& operator=(const parameters& other) {
00136         return copy(other);
00137       }
00138 
00139       /**
00140        * returns a pointer to a clone of the parameters
00141        */
00142       virtual classifier::parameters* clone() const {
00143         return new parameters(*this);
00144       }
00145 
00146 # ifndef _LTI_MSC_6
00147       /**
00148        * write the parameters in the given ioHandler
00149        * @param handler the ioHandler to be used
00150        * @param complete if true (the default) the enclosing begin/end will
00151        *        be also written, otherwise only the data block will be written.
00152        * @return true if write was successful
00153        */
00154       bool write(ioHandler& handler,const bool complete=true) const
00155 # else
00156       /**
00157        * this function is required by MSVC only, as a workaround for a
00158        * very awful bug, which exists since MSVC V.4.0, and still by
00159        * V.6.0 with all bugfixes (so called "service packs") remains
00160        * there...  This method is also public due to another bug, so please
00161        * NEVER EVER call this method directly: use write() instead
00162        */
00163       bool writeMS(ioHandler& handler,const bool complete=true) const
00164 # endif
00165       {
00166         bool b = true;
00167         if (complete) {
00168           b = handler.writeBegin();
00169         }
00170 
00171         if (b) {
00172           lti::write(handler,"devFac",devFac);
00173           lti::write(handler,"nbMaxSteps",nbMaxSteps);
00174           lti::write(handler,"variance",variance);
00175           if (probabilityFromDistanceMode==InterClusterDistanceIndependend) {
00176             lti::write(handler,"probabilityFromDistanceMode",
00177                        "InterClusterDistanceIndependend");
00178           } else {
00179             lti::write(handler,"probabilityFromDistanceMode",
00180                        "InterClusterDistanceDependend");
00181           }
00182 
00183         }
00184 
00185 # ifndef _LTI_MSC_6
00186         // This is the standard C++ code, which MS Visual C++ 6 is not able to
00187         // compile...
00188         b = b && clustering::parameters::write(handler,false);
00189 # else
00190         bool (clustering::parameters::* p_writeMS)(ioHandler&,const bool) const =
00191           clustering::parameters::writeMS;
00192         b = b && (this->*p_writeMS)(handler,false);
00193 # endif
00194 
00195         if (complete) {
00196           b = b && handler.writeEnd();
00197         }
00198 
00199         return b;
00200       }
00201 
00202 # ifdef _LTI_MSC_6
00203       /**
00204        * write the parameters in the given ioHandler
00205        * @param handler the ioHandler to be used
00206        * @param complete if true (the default) the enclosing begin/end will
00207        *        be also written, otherwise only the data block will be written.
00208        * @return true if write was successful
00209        */
00210       bool write(ioHandler& handler,const bool complete=true) const {
00211         // ...we need this workaround to cope with another really awful MSVC bug.
00212         return writeMS(handler,complete);
00213       }
00214 # endif
00215 
00216 # ifndef _LTI_MSC_6
00217       /**
00218        * read the parameters from the given ioHandler
00219        * @param handler the ioHandler to be used
00220        * @param complete if true (the default) the enclosing begin/end will
00221        *        be also written, otherwise only the data block will be written.
00222        * @return true if write was successful
00223        */
00224       bool read(ioHandler& handler,const bool complete=true)
00225 # else
00226       /**
00227        * this function is required by MSVC only, as a workaround for a
00228        * very awful bug, which exists since MSVC V.4.0, and still by
00229        * V.6.0 with all bugfixes (so called "service packs") remains
00230        * there...  This method is also public due to another bug, so please
00231        * NEVER EVER call this method directly: use read() instead
00232        */
00233       bool readMS(ioHandler& handler,const bool complete=true)
00234 # endif
00235       {
00236         bool b = true;
00237         if (complete) {
00238           b = handler.readBegin();
00239         }
00240 
00241         if (b) {
00242       
00243           lti::read(handler,"devFac",devFac);
00244           lti::read(handler,"nbMaxSteps",nbMaxSteps);
00245           lti::read(handler,"variance",variance);
00246           std::string str;
00247           lti::read(handler,"probabilityFromDistanceMode",str);
00248           if (str=="InterClusterDistanceIndependend") {
00249             probabilityFromDistanceMode=InterClusterDistanceIndependend;
00250           } else {
00251             probabilityFromDistanceMode=InterClusterDistanceDependend;
00252           }
00253         }
00254 
00255 # ifndef _LTI_MSC_6
00256         // This is the standard C++ code, which MS Visual C++ 6 is not able to
00257         // compile...
00258         b = b && clustering::parameters::read(handler,false);
00259 # else
00260         bool (clustering::parameters::* p_readMS)(ioHandler&,const bool) =
00261           clustering::parameters::readMS;
00262         b = b && (this->*p_readMS)(handler,false);
00263 # endif
00264 
00265         if (complete) {
00266           b = b && handler.readEnd();
00267         }
00268 
00269         return b;
00270       }
00271 
00272 # ifdef _LTI_MSC_6
00273       bool read(ioHandler& handler,const bool complete=true) {
00274         // ...we need this workaround to cope with another really awful MSVC
00275         // bug.
00276         return readMS(handler,complete);
00277       }
00278 # endif
00279 
00280       // ------------------------------------------------
00281       // the parameters
00282       // ------------------------------------------------
00283 
00284       /**
00285        * Factor that is used to decide if an edge is inconsistent.
00286        * First all edges are searched that lie at most nbMaxSteps away 
00287        * from the current node. Then m is the mean of the weights of all 
00288        * these edges and sigma is the standard deviation of these edges.
00289        * An Edge is considered to be in consistent if is weight is bigger than 
00290        * mean+devFac*sigma.
00291        * Default is 2.0
00292        */
00293       float devFac;
00294 
00295       /**
00296        * The maximum number of edges that can be between the starting point
00297        * and the last edge that is considered when computed inconsistent edges.
00298        * Default is 4.
00299        */
00300       int nbMaxSteps;
00301 
00302       /** 
00303        * Variance used to compute a possibility from the distances 
00304        * for classification. 
00305        * While classification, the membership of feature to all clusters 
00306        * is computed. This done by the following: 
00307        * Say d is the minimal distance between the feature that is classified
00308        * and one of the clusters. Say also w is the the minimal distance 
00309        * between the current cluster the nearest neighboring cluster, 
00310        * then the membership is defined either 
00311        * \f$ e^{-\frac{d^2}{\sigma^2w^2}} \f$ or 
00312        * \f$ e^{-\frac{d^2}{\sigma^2}} \f$ depending on the value of 
00313        * probabilityFromDistanceMode
00314        * Default is 0.33333.
00315        */
00316       double variance;
00317 
00318       /** 
00319        * The method used to compute a probabilty from the distance between 
00320        * the feature that is classified and the clusters. 
00321        * Valid valued are: InterClusterDistanceDependend and 
00322        * InterClusterDistanceIndependend.
00323        * See parameter variance
00324        */ 
00325       eProbabilityFromDistanceTypes probabilityFromDistanceMode;
00326     };
00327 
00328     /**
00329      * default constructor
00330      */
00331     MSTClustering();
00332 
00333     /**
00334      * copy constructor
00335      * @param other the object to be copied
00336      */
00337     MSTClustering(const MSTClustering& other);
00338 
00339     /**
00340      * destructor
00341      */
00342     virtual ~MSTClustering();
00343 
00344     /**
00345      * returns the name of this type ("MSTClustering")
00346      */
00347     virtual const char* getTypeName() const;
00348 
00349     /**
00350      * copy data of "other" clustering.
00351      * @param other the clustering to be copied
00352      * @return a reference to this clustering object
00353      */
00354     MSTClustering& copy(const MSTClustering& other);
00355 
00356     /**
00357      * alias for copy member
00358      * @param other the clustering to be copied
00359      * @return a reference to this clustering object
00360      */
00361     MSTClustering& operator=(const MSTClustering& other);
00362 
00363     /**
00364      * returns a pointer to a clone of this clustering.
00365      */
00366     virtual classifier* clone() const;
00367 
00368     /**
00369      * returns used parameters
00370      */
00371     const parameters& getParameters() const;
00372 
00373     /**
00374      * Unsupervised training.  The vectors in the <code>input</code>
00375      * matrix will be put into groups according to the training
00376      * algorithm.  Additionally, an integer indicating the class each
00377      * point belongs to is returned. <p> By default this method uses
00378      * the other train method train(const dmatrix&) and then
00379      * calls classify(const dvector&) to get the ids for each
00380      * train vector. These ids are then returned.
00381      * @param input the matrix with the input vectors (each row is a training
00382      *              vector)
00383      * @param ids vector of class ids for each input point
00384      * @return true if successful, false otherwise. (if false you can check
00385      *              the error message with getStatusString())
00386      */
00387     virtual bool train(const dmatrix& input,
00388                        ivector& ids);
00389 
00390     /**
00391      * Unsupervised training.
00392      * The vectors in the <code>input</code> matrix
00393      * will be clustered using each specific method.
00394      * @param input the matrix with the input vectors (each row is a training
00395      *              vector)
00396      * @return true if successful, false otherwise. (if false you can check
00397      *              the error message with getStatusString())
00398      */
00399     virtual bool train(const dmatrix& input);
00400 
00401     /**
00402      * Classification.
00403      * Classifies the feature and returns the outputVector with
00404      * the classification result.
00405      * @param feature the %vector to be classified
00406      * @param result the result of the classification
00407      * @return false if an error occurred during classification else true
00408      */
00409     virtual bool
00410       classify(const dvector& feature, outputVector& result) const;
00411 
00412   /**
00413    * read the parameters from the given ioHandler
00414    * @param handler the ioHandler to be used
00415    * @param complete if true (the default) the enclosing begin/end will
00416    *        be also read, otherwise only the data block will be read.
00417    * @return true if write was successful
00418    */
00419    virtual bool write (ioHandler &handler, const bool complete=true) const;
00420 
00421   /**
00422    * read the parameters from the given ioHandler
00423    * @param handler the ioHandler to be used
00424    * @param complete if true (the default) the enclosing begin/end will
00425    *        be also read, otherwise only the data block will be read.
00426    * @return true if write was successful
00427    */
00428   virtual bool read (ioHandler &handler, const bool complete=true);
00429 
00430   protected:
00431     /** 
00432      * collects the weights of all edge starting from node with id b 
00433      * until it reaches the given depth. a is used to make sure that 
00434      * the function does not run back the way same way he came from.
00435      */
00436     void collectDists(const int a,const int b,
00437                      std::list<std::pair<int,int> >& dists,int depth);
00438 
00439     /**
00440      * searchs for clusters in the graph after the inconsistent edges are 
00441      * deleted.
00442      */  
00443    void findClusters(const int from,int& curId,int& nextId,
00444                    weightedGraph<vector<double>,int,double>::node*& startNode,
00445                      vector<int>& ids,std::vector<int>& startNodes);
00446     /** 
00447      * The graph that contains the tree and the edges.
00448      */
00449    weightedGraph<vector<double>,int,double> gr1;
00450 
00451     /**
00452      * After the training process the vector contains under each id a 
00453      * kdTree with all points that are in the cluster with the id 
00454      * corresponding with the position in the vector
00455      */
00456    std::vector<kdTree<dvector,int,U >* > clusters;
00457 
00458     /** 
00459      * After the training process this map contains for each cluster 
00460      * with the given id the minimal distance to its nearest 
00461      * neighboring cluster.
00462      */
00463    std::map<int,double> minDistances;
00464 
00465     /** 
00466      * The distantor used computing distances.
00467      */
00468    U distantor;
00469 
00470 
00471   };
00472 }
00473 
00474 #include "ltiMSTClustering_template.h"
00475 
00476 #endif

Generated on Sat Apr 10 15:25:53 2010 for LTI-Lib by Doxygen 1.6.1