latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2002, 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*-------------------------------------------------------------------- 00025 * project ....: LTI-Lib: Image Processing and Computer Vision Library 00026 * file .......: ltiClusteringValidity.h 00027 * authors ....: Jens Paustenbach 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 4.4.2002 00030 * revisions ..: $Id: ltiClusteringValidity.h,v 1.8 2006/02/07 18:14:32 ltilib Exp $ 00031 */ 00032 00033 #ifndef _LTI_CLUSTERING_VALIDITY_H_ 00034 #define _LTI_CLUSTERING_VALIDITY_H_ 00035 00036 #include "ltiVector.h" 00037 #include "ltiMatrix.h" 00038 #include "ltiL2Distance.h" 00039 #include "ltiFunctor.h" 00040 #include <list> 00041 00042 namespace lti { 00043 /** 00044 * Parent class for all clustering validity measures. 00045 * Clustering validity measures are used to evaluate the quality of a 00046 * clustering. The measure can e.a. be used to find the best possible 00047 * number of clusters in a data set. 00048 * It provides some distance and diameter measure for clusters. 00049 * These measures are descriped in IEEE Transaction on systems,man and 00050 * cybernetics - part B: cybernetics, Vol 28, No.3, June 1998, 301-315 00051 */ 00052 // ------------------------------------------------------------------ 00053 // clustering Validity 00054 // ------------------------------------------------------------------ 00055 class clusteringValidity : public functor { 00056 public: 00057 /** 00058 * default constructor 00059 */ 00060 clusteringValidity(); 00061 00062 /** 00063 * default constructor 00064 */ 00065 clusteringValidity(const clusteringValidity& other); 00066 00067 /** 00068 * destructor 00069 */ 00070 virtual ~clusteringValidity(); 00071 00072 /** 00073 * returns the name of this type ("clusteringValidity") 00074 */ 00075 virtual const char* getTypeName() const; 00076 00077 /** 00078 * alias for copy member 00079 * @param other the functor to be copied 00080 * @return a reference to this functor object 00081 */ 00082 clusteringValidity& operator=(const clusteringValidity& other); 00083 00084 /** 00085 * copy data of "other" functor. 00086 * @param other the functor to be copied 00087 * @return a reference to this functor object 00088 */ 00089 clusteringValidity& copy(const clusteringValidity& other); 00090 00091 /** 00092 * abstract parant class 00093 * operates on the given %parameter. 00094 * @param clusteredData std::vector<dmatrix> with the source data. 00095 * @param index the clustering validity result 00096 * @param centroids dmatrix with each row representing a centroid 00097 * of the corresponding distribution in clusteredData 00098 * @return true if apply successful or false otherwise. 00099 */ 00100 virtual bool apply(const std::vector<dmatrix>& clusteredData, 00101 double& index, const dmatrix& centroids) const =0; 00102 00103 protected: 00104 /** 00105 * calculates the minimum distances of the points in the given matrices 00106 * and returns it. 00107 */ 00108 double getMinimumDistance(const dmatrix& m1,const dmatrix& m2) const; 00109 00110 /** 00111 * calculates the maximum distance of all the points in the given 00112 * matrices and returns this distance 00113 */ 00114 double getMaximumDistance(const dmatrix& m1,const dmatrix& m2) const; 00115 00116 /** 00117 * return the distance of the centroids of m1 and m2 00118 */ 00119 double getCentroidDistance(const dmatrix& m1,const dmatrix& m2) const; 00120 00121 /** 00122 * return the average distance of all the point in m1 and m2 00123 */ 00124 double getAverageDistance(const dmatrix& m1,const dmatrix& m2) const; 00125 00126 /** 00127 * accumulates the the distances of all points each matrix to matrix 00128 * and divides the sum of these distance through the sum of all points 00129 */ 00130 double getAverageInterpointDistance(const dmatrix& m1, 00131 const dmatrix& m2) const ; 00132 00133 /** 00134 * return the maximum distance of all points in m1 00135 */ 00136 double getStandardDiameter(const dmatrix& m1) const; 00137 00138 /** 00139 * the average distance of all points in m1 00140 */ 00141 double getAverageDiameter(const dmatrix& m1) const; 00142 00143 /** 00144 * average distance between each point and the centroid of m1 00145 */ 00146 double getAverageToCentroidDiameter(const dmatrix& m1) const; 00147 00148 00149 }; 00150 00151 //////////////////////////////////////////////////////////////////////// 00152 //////////////////////// D U N N I N D E X /////////////////////////// 00153 //////////////////////////////////////////////////////////////////////// 00154 /** 00155 * computes the Dunn Index and its generalisations described in 00156 * Bezdek, J.C.,Pal,N.R., 1998. "Some new indexes of cluster validity." 00157 * IEEE Transaction on Systems,Man, and cybernetics (Part B) 28, 301-315 00158 * the distance and diameter functions are implemented in the parent class 00159 * so they can be used for other computations of validity indexes. 00160 * <p>To find the best possible clustering look for the maximum value of 00161 * this indices 00162 * <p>Valid values for the measure of the diameters are: 00163 * standard: the largest distance between to points in a cluster 00164 * average: the average distance between all points in the cluster 00165 * centroid: twice the average distance between each point and the centroid 00166 * <p> valid values for the distance measure are 00167 * minimum: the minimal distance between the points in the two clusters 00168 * maximum: the maximum distance between the points in the two clusters 00169 * mean: the average distance between all points in the clusters 00170 * centroids: the distance between the centroids of the clusters 00171 * interpoint: combination of mean and centroids 00172 */ 00173 class dunnIndex : public clusteringValidity { 00174 public: 00175 00176 class parameters : public clusteringValidity::parameters { 00177 public: 00178 /** 00179 * default constructor 00180 */ 00181 parameters(); 00182 00183 /** 00184 * copy constructor 00185 * @param other the parameters object to be copied 00186 */ 00187 parameters(const parameters& other); 00188 00189 /** 00190 * destructor 00191 */ 00192 ~parameters(); 00193 00194 /** 00195 * returns name of this type 00196 */ 00197 const char* getTypeName() const; 00198 00199 /** 00200 * copy the contents of a parameters object 00201 * @param other the parameters object to be copied 00202 * @return a reference to this parameters object 00203 */ 00204 parameters& copy(const parameters& other); 00205 00206 /** 00207 * copy the contents of a parameters object 00208 * @param other the parameters object to be copied 00209 * @return a reference to this parameters object 00210 */ 00211 parameters& operator=(const parameters& other); 00212 00213 00214 /** 00215 * returns a pointer to a clone of the parameters 00216 */ 00217 virtual functor::parameters* clone() const; 00218 00219 /** 00220 * write the parameters in the given ioHandler 00221 * @param handler the ioHandler to be used 00222 * @param complete if true (the default) the enclosing begin/end will 00223 * be also written, otherwise only the data block will be written. 00224 * @return true if write was successful 00225 */ 00226 virtual bool write(ioHandler& handler,const bool complete=true) const; 00227 00228 /** 00229 * read the parameters from the given ioHandler 00230 * @param handler the ioHandler to be used 00231 * @param complete if true (the default) the enclosing begin/end will 00232 * be also written, otherwise only the data block will be written. 00233 * @return true if write was successful 00234 */ 00235 virtual bool read(ioHandler& handler,const bool complete=true); 00236 00237 # ifdef _LTI_MSC_6 00238 /** 00239 * this function is required by MSVC only, as a workaround for a 00240 * very awful bug, which exists since MSVC V.4.0, and still by 00241 * V.6.0 with all bugfixes (so called "service packs") remains 00242 * there... This method is also public due to another bug, so please 00243 * NEVER EVER call this method directly: use read() instead 00244 */ 00245 bool readMS(ioHandler& handler,const bool complete=true); 00246 00247 /** 00248 * this function is required by MSVC only, as a workaround for a 00249 * very awful bug, which exists since MSVC V.4.0, and still by 00250 * V.6.0 with all bugfixes (so called "service packs") remains 00251 * there... This method is also public due to another bug, so please 00252 * NEVER EVER call this method directly: use write() instead 00253 */ 00254 bool writeMS(ioHandler& handler,const bool complete=true) const; 00255 # endif 00256 00257 // ------------------------------------------------ 00258 // the parameters 00259 // ------------------------------------------------ 00260 00261 /** 00262 * Diameter Measure Types 00263 */ 00264 enum eDiameterMeasure { 00265 Standard, /* use standard diameter */ 00266 Average, /* use average distance of all points as diameter */ 00267 Centroid /* use the average distance of each point to 00268 the centroid */ 00269 }; 00270 00271 /** 00272 * Distance Measure Types 00273 */ 00274 enum eDistanceMeasure { 00275 Minimum=0, /* use minimum distance between clusters */ 00276 Maximum, /* use maximum distance between clusters */ 00277 Mean, /* use average distance between clusters */ 00278 Centroids, /* use distance of the centroids */ 00279 Interpoint /* use average of all interpoint distances */ 00280 }; 00281 00282 /** 00283 * Diameter measure 00284 * 00285 * Default value: Standard 00286 */ 00287 eDiameterMeasure diameterMeasure; 00288 00289 /** 00290 * Distance measure 00291 * 00292 * Default value: Minimum 00293 */ 00294 eDistanceMeasure distanceMeasure; 00295 }; 00296 00297 // ---------------------------------------------------------- 00298 // Dunn Index 00299 // ---------------------------------------------------------- 00300 00301 /** 00302 * standard contructor 00303 */ 00304 dunnIndex(); 00305 00306 /** 00307 * standard contructor 00308 */ 00309 dunnIndex(const dunnIndex& other); 00310 00311 /** 00312 * destructor 00313 */ 00314 virtual ~dunnIndex(); 00315 00316 /** 00317 * returns the name of this type 00318 */ 00319 virtual const char* getTypeName() const; 00320 00321 /** 00322 * calculates the Dunn Index of the clustered Data 00323 * @param clusteredData std::vector<dmatrix> with the source data. 00324 * @param index the clustering validity result 00325 * @param centroids dmatrix with each row representing a centroid 00326 * of the corresponding distribution in clusteredData 00327 * @return true if apply successful or false otherwise. 00328 */ 00329 virtual bool apply(const std::vector<dmatrix>& clusteredData, 00330 double& index, const dmatrix& centroids) const; 00331 00332 00333 const parameters& getParameters() const; 00334 00335 /** 00336 * copy data of "other" functor. 00337 * @param other the functor to be copied 00338 * @return a reference to this functor object 00339 */ 00340 dunnIndex& copy(const dunnIndex& other); 00341 00342 /** 00343 * returns a pointer to a clone of this functor. 00344 */ 00345 virtual functor* clone() const; 00346 00347 /** 00348 * alias for copy member 00349 * @param other the functor to be copied 00350 * @return a reference to this functor object 00351 */ 00352 dunnIndex& operator=(const dunnIndex& other); 00353 00354 }; 00355 00356 //////////////////////////////////////////////////////////////////////// 00357 /////////////// Modified Hubert Statistic ////////////////////////////// 00358 //////////////////////////////////////////////////////////////////////// 00359 /** 00360 * Calculates the modified Hubert statistic of the given clustering. 00361 * 00362 * When the same data set is tested with different parameters, e.a. with 00363 * different number of clusters, the best possible clustering is shown 00364 * by a sharp knee in this index. 00365 * 00366 * <em> This validity measure is not defined for one cluster or if 00367 * the number of clusters is equal the the number of points </em> 00368 */ 00369 class modHubertStat : public clusteringValidity { 00370 public: 00371 00372 /** 00373 * standard contructor 00374 */ 00375 modHubertStat(); 00376 00377 /** 00378 * standard contructor 00379 */ 00380 modHubertStat(const modHubertStat& other); 00381 00382 /** 00383 * destructor 00384 */ 00385 virtual ~modHubertStat(); 00386 00387 /** 00388 * copy data of "other" functor. 00389 * @param other the functor to be copied 00390 * @return a reference to this functor object 00391 */ 00392 modHubertStat& copy(const modHubertStat& other); 00393 00394 /** 00395 * alias for copy member 00396 * @param other the functor to be copied 00397 * @return a reference to this functor object 00398 */ 00399 modHubertStat& operator=(const modHubertStat& other); 00400 00401 /** 00402 * returns the name of this type 00403 */ 00404 virtual const char* getTypeName() const; 00405 00406 /** 00407 * returns a pointer to a clone of this functor. 00408 */ 00409 virtual functor* clone() const; 00410 00411 /** 00412 * calculates the Dunn Index of the clustered Data 00413 * @param clusteredData std::vector<dmatrix> with the source data. 00414 * @param index the clustering validity result 00415 * @param centroids dmatrix with each row representing a centroid 00416 * of the corresponding distribution in clusteredData 00417 * @return true if apply successful or false otherwise. 00418 */ 00419 virtual bool apply(const std::vector<dmatrix>& clusteredData, 00420 double& index, const dmatrix& centroids) const; 00421 }; 00422 00423 //////////////////////////////////////////////////////////////////////// 00424 ////////// normalized Modified Hubert Statistic //////////////////////// 00425 //////////////////////////////////////////////////////////////////////// 00426 /** 00427 * Calculates the normalized version of the modified Hubert statistic 00428 * The index is between -1 and 1. A value near 1 shows compact well 00429 * separated clusters. 00430 * When the same data set is tested with different parameters, e.a. with 00431 * different number of clusters, the best possible clustering is shown 00432 * by a sharp knee in this index. 00433 * <em> This validity measure is not defined, if the number of cluster is 00434 * 1 or equal to the number of points </em> 00435 */ 00436 class normModHubertStat : public clusteringValidity { 00437 public: 00438 00439 /** 00440 * standard contructor 00441 */ 00442 normModHubertStat(); 00443 00444 /** 00445 * standard contructor 00446 */ 00447 normModHubertStat(const normModHubertStat& other); 00448 00449 /** 00450 * destructor 00451 */ 00452 virtual ~normModHubertStat(); 00453 00454 /** 00455 * copy data of "other" functor. 00456 * @param other the functor to be copied 00457 * @return a reference to this functor object 00458 */ 00459 normModHubertStat& copy(const normModHubertStat& other); 00460 00461 /** 00462 * alias for copy member 00463 * @param other the functor to be copied 00464 * @return a reference to this functor object 00465 */ 00466 normModHubertStat& operator=(const normModHubertStat& other); 00467 00468 /** 00469 * returns the name of this type 00470 */ 00471 virtual const char* getTypeName() const; 00472 00473 /**# 00474 * returns a pointer to a clone of this functor. 00475 */ 00476 virtual functor* clone() const; 00477 00478 /** 00479 * calculates the normalized modified Hubert Statistics 00480 * of the clustered data. 00481 * @param clusteredData std::vector<dmatrix> with the source data. 00482 * @param index the clustering validity result 00483 * @param centroids dmatrix with each row representing a centroid 00484 * of the corresponding distribution in clusteredData 00485 * @return true if apply successful or false otherwise. 00486 */ 00487 virtual bool apply(const std::vector<dmatrix>& clusteredData, 00488 double& index, const dmatrix& centroids) const; 00489 }; 00490 00491 //////////////////////////////////////////////////////////////////////// 00492 //////////////////////// Davies Bouldin Index ////////////////////////// 00493 //////////////////////////////////////////////////////////////////////// 00494 00495 /** 00496 * Calculates the Davies Bouldin Index of the given clustering. 00497 * If different clustering, e.a. with different number of clusters, 00498 * of the same data set are tested, the best 00499 * will be the clustering with the minimal Davies Bouldin Index. 00500 */ 00501 class daviesBouldinIndex : public clusteringValidity { 00502 public: 00503 00504 /** 00505 * standard contructor 00506 */ 00507 daviesBouldinIndex(); 00508 00509 /** 00510 * standard contructor 00511 */ 00512 daviesBouldinIndex(const daviesBouldinIndex& other); 00513 00514 /** 00515 * destructor 00516 */ 00517 virtual ~daviesBouldinIndex(); 00518 00519 /** 00520 * copy data of "other" functor. 00521 * @param other the functor to be copied 00522 * @return a reference to this functor object 00523 */ 00524 daviesBouldinIndex& copy(const daviesBouldinIndex& other); 00525 00526 /** 00527 * alias for copy member 00528 * @param other the functor to be copied 00529 * @return a reference to this functor object 00530 */ 00531 daviesBouldinIndex& operator=(const daviesBouldinIndex& other); 00532 00533 /** 00534 * returns the name of this type 00535 */ 00536 virtual const char* getTypeName() const; 00537 00538 /** 00539 * returns a pointer to a clone of this functor. 00540 */ 00541 virtual functor* clone() const; 00542 00543 /** 00544 * calculates the Davies Bouldin Index of the clustered data. 00545 * @param clusteredData std::vector<dmatrix> with the source data. 00546 * @param index the clustering validity result 00547 * @param centroids dmatrix with each row representing a centroid 00548 * of the corresponding distribution in clusteredData 00549 * @return true if apply successful or false otherwise. 00550 */ 00551 virtual bool apply(const std::vector<dmatrix>& clusteredData, 00552 double& index, const dmatrix& centroids) const; 00553 }; 00554 00555 00556 } 00557 00558 #endif