latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*-------------------------------------------------------------------- 00025 * project ....: LTI-Lib: Image Processing and Computer Vision Library 00026 * file .......: ltiMSTClustering.h 00027 * authors ....: Jens Paustenbach 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 22.4.2003 00030 * revisions ..: $Id: ltiMSTClustering.h,v 1.14 2006/02/07 18:20:28 ltilib Exp $ 00031 */ 00032 00033 #ifndef _LTI_M_S_T_CLUSTERING_H_ 00034 #define _LTI_M_S_T_CLUSTERING_H_ 00035 00036 #include "ltiWeightedGraph.h" 00037 #include "ltiClustering.h" 00038 #include "ltiKdTree.h" 00039 #include "ltiL2Distance.h" 00040 00041 namespace lti { 00042 /** 00043 * This class implements a minimum spanning tree clustering. 00044 * 00045 * First a minimum spanning tree of the input data is computed. 00046 * Then all edges that are unusually large compared the 00047 * neighboring edges are removed. The clusters are the connected 00048 * components of the minimum spanning tree after the removal of 00049 * the inconsistent edges. 00050 * 00051 * As template parameter the distantor accoding to the distance measure 00052 * you want to use has to be specified. 00053 */ 00054 template<class U=l2SquareDistantor<dvector> > 00055 class MSTClustering : public clustering { 00056 public: 00057 /** 00058 * the parameters for the class MSTClustering 00059 */ 00060 class parameters : public clustering::parameters { 00061 public: 00062 00063 /** 00064 * The different methods that can be used to compute a probability 00065 * from a distance. 00066 */ 00067 enum eProbabilityFromDistanceTypes { 00068 InterClusterDistanceIndependend, 00069 InterClusterDistanceDependend 00070 }; 00071 00072 /** 00073 * default constructor 00074 */ 00075 parameters() : clustering::parameters() { 00076 devFac = 2.0; 00077 nbMaxSteps = 4; 00078 variance = 0.333333; 00079 probabilityFromDistanceMode = InterClusterDistanceDependend; 00080 } 00081 00082 /** 00083 * copy constructor 00084 * @param other the parameters object to be copied 00085 */ 00086 parameters(const parameters& other) : clustering::parameters() { 00087 copy(other); 00088 } 00089 00090 /** 00091 * destructor 00092 */ 00093 ~parameters() { 00094 } 00095 00096 /** 00097 * returns name of this type 00098 */ 00099 const char* getTypeName() const { 00100 return "MSTClustering<U>::parameters"; 00101 } 00102 00103 /** 00104 * copy the contents of a parameters object 00105 * @param other the parameters object to be copied 00106 * @return a reference to this parameters object 00107 */ 00108 parameters& copy(const parameters& other) { 00109 # ifndef _LTI_MSC_6 00110 // MS Visual C++ 6 is not able to compile this... 00111 clustering::parameters::copy(other); 00112 # else 00113 // ...so we have to use this workaround. 00114 // Conditional on that, copy may not be virtual. 00115 clustering::parameters& (clustering::parameters::* p_copy) 00116 (const clustering::parameters&) = 00117 clustering::parameters::copy; 00118 (this->*p_copy)(other); 00119 # endif 00120 00121 devFac = other.devFac; 00122 nbMaxSteps = other.nbMaxSteps; 00123 variance = other.variance; 00124 probabilityFromDistanceMode = other.probabilityFromDistanceMode; 00125 00126 00127 return *this; 00128 } 00129 00130 /** 00131 * copy the contents of a parameters object 00132 * @param other the parameters object to be copied 00133 * @return a reference to this parameters object 00134 */ 00135 parameters& operator=(const parameters& other) { 00136 return copy(other); 00137 } 00138 00139 /** 00140 * returns a pointer to a clone of the parameters 00141 */ 00142 virtual classifier::parameters* clone() const { 00143 return new parameters(*this); 00144 } 00145 00146 # ifndef _LTI_MSC_6 00147 /** 00148 * write the parameters in the given ioHandler 00149 * @param handler the ioHandler to be used 00150 * @param complete if true (the default) the enclosing begin/end will 00151 * be also written, otherwise only the data block will be written. 00152 * @return true if write was successful 00153 */ 00154 bool write(ioHandler& handler,const bool complete=true) const 00155 # else 00156 /** 00157 * this function is required by MSVC only, as a workaround for a 00158 * very awful bug, which exists since MSVC V.4.0, and still by 00159 * V.6.0 with all bugfixes (so called "service packs") remains 00160 * there... This method is also public due to another bug, so please 00161 * NEVER EVER call this method directly: use write() instead 00162 */ 00163 bool writeMS(ioHandler& handler,const bool complete=true) const 00164 # endif 00165 { 00166 bool b = true; 00167 if (complete) { 00168 b = handler.writeBegin(); 00169 } 00170 00171 if (b) { 00172 lti::write(handler,"devFac",devFac); 00173 lti::write(handler,"nbMaxSteps",nbMaxSteps); 00174 lti::write(handler,"variance",variance); 00175 if (probabilityFromDistanceMode==InterClusterDistanceIndependend) { 00176 lti::write(handler,"probabilityFromDistanceMode", 00177 "InterClusterDistanceIndependend"); 00178 } else { 00179 lti::write(handler,"probabilityFromDistanceMode", 00180 "InterClusterDistanceDependend"); 00181 } 00182 00183 } 00184 00185 # ifndef _LTI_MSC_6 00186 // This is the standard C++ code, which MS Visual C++ 6 is not able to 00187 // compile... 00188 b = b && clustering::parameters::write(handler,false); 00189 # else 00190 bool (clustering::parameters::* p_writeMS)(ioHandler&,const bool) const = 00191 clustering::parameters::writeMS; 00192 b = b && (this->*p_writeMS)(handler,false); 00193 # endif 00194 00195 if (complete) { 00196 b = b && handler.writeEnd(); 00197 } 00198 00199 return b; 00200 } 00201 00202 # ifdef _LTI_MSC_6 00203 /** 00204 * write the parameters in the given ioHandler 00205 * @param handler the ioHandler to be used 00206 * @param complete if true (the default) the enclosing begin/end will 00207 * be also written, otherwise only the data block will be written. 00208 * @return true if write was successful 00209 */ 00210 bool write(ioHandler& handler,const bool complete=true) const { 00211 // ...we need this workaround to cope with another really awful MSVC bug. 00212 return writeMS(handler,complete); 00213 } 00214 # endif 00215 00216 # ifndef _LTI_MSC_6 00217 /** 00218 * read the parameters from the given ioHandler 00219 * @param handler the ioHandler to be used 00220 * @param complete if true (the default) the enclosing begin/end will 00221 * be also written, otherwise only the data block will be written. 00222 * @return true if write was successful 00223 */ 00224 bool read(ioHandler& handler,const bool complete=true) 00225 # else 00226 /** 00227 * this function is required by MSVC only, as a workaround for a 00228 * very awful bug, which exists since MSVC V.4.0, and still by 00229 * V.6.0 with all bugfixes (so called "service packs") remains 00230 * there... This method is also public due to another bug, so please 00231 * NEVER EVER call this method directly: use read() instead 00232 */ 00233 bool readMS(ioHandler& handler,const bool complete=true) 00234 # endif 00235 { 00236 bool b = true; 00237 if (complete) { 00238 b = handler.readBegin(); 00239 } 00240 00241 if (b) { 00242 00243 lti::read(handler,"devFac",devFac); 00244 lti::read(handler,"nbMaxSteps",nbMaxSteps); 00245 lti::read(handler,"variance",variance); 00246 std::string str; 00247 lti::read(handler,"probabilityFromDistanceMode",str); 00248 if (str=="InterClusterDistanceIndependend") { 00249 probabilityFromDistanceMode=InterClusterDistanceIndependend; 00250 } else { 00251 probabilityFromDistanceMode=InterClusterDistanceDependend; 00252 } 00253 } 00254 00255 # ifndef _LTI_MSC_6 00256 // This is the standard C++ code, which MS Visual C++ 6 is not able to 00257 // compile... 00258 b = b && clustering::parameters::read(handler,false); 00259 # else 00260 bool (clustering::parameters::* p_readMS)(ioHandler&,const bool) = 00261 clustering::parameters::readMS; 00262 b = b && (this->*p_readMS)(handler,false); 00263 # endif 00264 00265 if (complete) { 00266 b = b && handler.readEnd(); 00267 } 00268 00269 return b; 00270 } 00271 00272 # ifdef _LTI_MSC_6 00273 bool read(ioHandler& handler,const bool complete=true) { 00274 // ...we need this workaround to cope with another really awful MSVC 00275 // bug. 00276 return readMS(handler,complete); 00277 } 00278 # endif 00279 00280 // ------------------------------------------------ 00281 // the parameters 00282 // ------------------------------------------------ 00283 00284 /** 00285 * Factor that is used to decide if an edge is inconsistent. 00286 * First all edges are searched that lie at most nbMaxSteps away 00287 * from the current node. Then m is the mean of the weights of all 00288 * these edges and sigma is the standard deviation of these edges. 00289 * An Edge is considered to be in consistent if is weight is bigger than 00290 * mean+devFac*sigma. 00291 * Default is 2.0 00292 */ 00293 float devFac; 00294 00295 /** 00296 * The maximum number of edges that can be between the starting point 00297 * and the last edge that is considered when computed inconsistent edges. 00298 * Default is 4. 00299 */ 00300 int nbMaxSteps; 00301 00302 /** 00303 * Variance used to compute a possibility from the distances 00304 * for classification. 00305 * While classification, the membership of feature to all clusters 00306 * is computed. This done by the following: 00307 * Say d is the minimal distance between the feature that is classified 00308 * and one of the clusters. Say also w is the the minimal distance 00309 * between the current cluster the nearest neighboring cluster, 00310 * then the membership is defined either 00311 * \f$ e^{-\frac{d^2}{\sigma^2w^2}} \f$ or 00312 * \f$ e^{-\frac{d^2}{\sigma^2}} \f$ depending on the value of 00313 * probabilityFromDistanceMode 00314 * Default is 0.33333. 00315 */ 00316 double variance; 00317 00318 /** 00319 * The method used to compute a probabilty from the distance between 00320 * the feature that is classified and the clusters. 00321 * Valid valued are: InterClusterDistanceDependend and 00322 * InterClusterDistanceIndependend. 00323 * See parameter variance 00324 */ 00325 eProbabilityFromDistanceTypes probabilityFromDistanceMode; 00326 }; 00327 00328 /** 00329 * default constructor 00330 */ 00331 MSTClustering(); 00332 00333 /** 00334 * copy constructor 00335 * @param other the object to be copied 00336 */ 00337 MSTClustering(const MSTClustering& other); 00338 00339 /** 00340 * destructor 00341 */ 00342 virtual ~MSTClustering(); 00343 00344 /** 00345 * returns the name of this type ("MSTClustering") 00346 */ 00347 virtual const char* getTypeName() const; 00348 00349 /** 00350 * copy data of "other" clustering. 00351 * @param other the clustering to be copied 00352 * @return a reference to this clustering object 00353 */ 00354 MSTClustering& copy(const MSTClustering& other); 00355 00356 /** 00357 * alias for copy member 00358 * @param other the clustering to be copied 00359 * @return a reference to this clustering object 00360 */ 00361 MSTClustering& operator=(const MSTClustering& other); 00362 00363 /** 00364 * returns a pointer to a clone of this clustering. 00365 */ 00366 virtual classifier* clone() const; 00367 00368 /** 00369 * returns used parameters 00370 */ 00371 const parameters& getParameters() const; 00372 00373 /** 00374 * Unsupervised training. The vectors in the <code>input</code> 00375 * matrix will be put into groups according to the training 00376 * algorithm. Additionally, an integer indicating the class each 00377 * point belongs to is returned. <p> By default this method uses 00378 * the other train method train(const dmatrix&) and then 00379 * calls classify(const dvector&) to get the ids for each 00380 * train vector. These ids are then returned. 00381 * @param input the matrix with the input vectors (each row is a training 00382 * vector) 00383 * @param ids vector of class ids for each input point 00384 * @return true if successful, false otherwise. (if false you can check 00385 * the error message with getStatusString()) 00386 */ 00387 virtual bool train(const dmatrix& input, 00388 ivector& ids); 00389 00390 /** 00391 * Unsupervised training. 00392 * The vectors in the <code>input</code> matrix 00393 * will be clustered using each specific method. 00394 * @param input the matrix with the input vectors (each row is a training 00395 * vector) 00396 * @return true if successful, false otherwise. (if false you can check 00397 * the error message with getStatusString()) 00398 */ 00399 virtual bool train(const dmatrix& input); 00400 00401 /** 00402 * Classification. 00403 * Classifies the feature and returns the outputVector with 00404 * the classification result. 00405 * @param feature the %vector to be classified 00406 * @param result the result of the classification 00407 * @return false if an error occurred during classification else true 00408 */ 00409 virtual bool 00410 classify(const dvector& feature, outputVector& result) const; 00411 00412 /** 00413 * read the parameters from the given ioHandler 00414 * @param handler the ioHandler to be used 00415 * @param complete if true (the default) the enclosing begin/end will 00416 * be also read, otherwise only the data block will be read. 00417 * @return true if write was successful 00418 */ 00419 virtual bool write (ioHandler &handler, const bool complete=true) const; 00420 00421 /** 00422 * read the parameters from the given ioHandler 00423 * @param handler the ioHandler to be used 00424 * @param complete if true (the default) the enclosing begin/end will 00425 * be also read, otherwise only the data block will be read. 00426 * @return true if write was successful 00427 */ 00428 virtual bool read (ioHandler &handler, const bool complete=true); 00429 00430 protected: 00431 /** 00432 * collects the weights of all edge starting from node with id b 00433 * until it reaches the given depth. a is used to make sure that 00434 * the function does not run back the way same way he came from. 00435 */ 00436 void collectDists(const int a,const int b, 00437 std::list<std::pair<int,int> >& dists,int depth); 00438 00439 /** 00440 * searchs for clusters in the graph after the inconsistent edges are 00441 * deleted. 00442 */ 00443 void findClusters(const int from,int& curId,int& nextId, 00444 weightedGraph<vector<double>,int,double>::node*& startNode, 00445 vector<int>& ids,std::vector<int>& startNodes); 00446 /** 00447 * The graph that contains the tree and the edges. 00448 */ 00449 weightedGraph<vector<double>,int,double> gr1; 00450 00451 /** 00452 * After the training process the vector contains under each id a 00453 * kdTree with all points that are in the cluster with the id 00454 * corresponding with the position in the vector 00455 */ 00456 std::vector<kdTree<dvector,int,U >* > clusters; 00457 00458 /** 00459 * After the training process this map contains for each cluster 00460 * with the given id the minimal distance to its nearest 00461 * neighboring cluster. 00462 */ 00463 std::map<int,double> minDistances; 00464 00465 /** 00466 * The distantor used computing distances. 00467 */ 00468 U distantor; 00469 00470 00471 }; 00472 } 00473 00474 #include "ltiMSTClustering_template.h" 00475 00476 #endif