LTI-Lib latest version v1.9 - last update 10 Apr 2010

ltiClusteringValidity.h

00001 /*
00002  * Copyright (C) 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  * 
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public 
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.  
00021  */ 
00022 
00023  
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiClusteringValidity.h
00027  * authors ....: Jens Paustenbach
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 4.4.2002
00030  * revisions ..: $Id: ltiClusteringValidity.h,v 1.8 2006/02/07 18:14:32 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_CLUSTERING_VALIDITY_H_
00034 #define _LTI_CLUSTERING_VALIDITY_H_
00035 
00036 #include "ltiVector.h"
00037 #include "ltiMatrix.h"
00038 #include "ltiL2Distance.h"
00039 #include "ltiFunctor.h"
00040 #include <list>
00041 
00042 namespace lti {
00043   /**
00044    *  Parent class for all clustering validity measures.
00045    *  Clustering validity measures are used to evaluate the quality of a 
00046    *  clustering. The measure can e.a. be used to find the best possible
00047    *  number of clusters in a data set.
00048    *  It provides some distance and diameter measure for clusters.
00049    *  These measures are descriped in IEEE Transaction on systems,man and
00050    *  cybernetics - part B: cybernetics, Vol 28, No.3, June 1998, 301-315
00051    */
00052   // ------------------------------------------------------------------
00053   // clustering Validity
00054   // ------------------------------------------------------------------
00055   class clusteringValidity : public functor {
00056   public:
00057     /**
00058      * default constructor
00059      */
00060     clusteringValidity();
00061 
00062     /**
00063      * default constructor
00064      */
00065     clusteringValidity(const clusteringValidity& other);
00066 
00067     /**
00068      * destructor
00069      */
00070     virtual ~clusteringValidity();
00071 
00072     /**
00073      * returns the name of this type ("clusteringValidity")
00074      */
00075     virtual const char* getTypeName() const;
00076 
00077     /**
00078      * alias for copy member
00079      * @param other the functor to be copied
00080      * @return a reference to this functor object
00081      */
00082     clusteringValidity& operator=(const clusteringValidity& other);
00083 
00084     /**
00085      * copy data of "other" functor.
00086      * @param other the functor to be copied
00087      * @return a reference to this functor object
00088      */
00089     clusteringValidity& copy(const clusteringValidity& other);
00090     
00091     /**
00092      * abstract parant class
00093      * operates on the given %parameter.
00094      * @param clusteredData std::vector<dmatrix> with the source data.
00095      * @param index the clustering validity result
00096      * @param centroids dmatrix with each row representing a centroid
00097      *        of the corresponding distribution in clusteredData
00098      * @return true if apply successful or false otherwise.
00099      */
00100     virtual bool apply(const std::vector<dmatrix>& clusteredData, 
00101                        double& index, const dmatrix& centroids) const =0;
00102 
00103   protected:
00104     /**
00105      * calculates the minimum distances of the points in the given matrices
00106      * and returns it.
00107      */
00108     double getMinimumDistance(const dmatrix& m1,const dmatrix& m2) const;
00109 
00110     /**
00111      * calculates the maximum distance of all the points in the given
00112      * matrices and returns this distance
00113      */
00114     double getMaximumDistance(const dmatrix& m1,const dmatrix& m2) const;
00115 
00116     /** 
00117      * return the distance of the centroids of m1 and m2
00118      */
00119     double getCentroidDistance(const dmatrix& m1,const dmatrix& m2) const;
00120 
00121     /** 
00122      * return the average distance of all the point in m1 and m2
00123      */
00124     double getAverageDistance(const dmatrix& m1,const dmatrix& m2) const;
00125 
00126     /** 
00127      * accumulates the the distances of all points each matrix to matrix
00128      * and divides the sum of these distance through the sum of all points
00129      */
00130     double getAverageInterpointDistance(const dmatrix& m1,
00131                                         const dmatrix& m2) const ;
00132 
00133     /**
00134      * return the maximum distance of all points in m1
00135      */
00136     double getStandardDiameter(const dmatrix& m1) const;
00137 
00138     /** 
00139      * the average distance of all points in m1
00140      */
00141     double getAverageDiameter(const dmatrix& m1) const;
00142 
00143     /** 
00144      * average distance between each point and the centroid of m1
00145      */
00146     double getAverageToCentroidDiameter(const dmatrix& m1) const;
00147       
00148 
00149   };
00150 
00151   ////////////////////////////////////////////////////////////////////////
00152   //////////////////////// D U N N   I N D E X ///////////////////////////
00153   ////////////////////////////////////////////////////////////////////////
00154   /** 
00155    * computes the Dunn Index and its generalisations described in 
00156    * Bezdek, J.C.,Pal,N.R., 1998. "Some new indexes of cluster validity."
00157    * IEEE Transaction on Systems,Man, and cybernetics (Part B) 28, 301-315
00158    * the distance and diameter functions are implemented in the parent class
00159    * so they can be used for other computations of validity indexes.
00160    * <p>To find the best possible clustering look for the maximum value of
00161    * this indices
00162    * <p>Valid values for the measure of the diameters are:
00163    * standard: the largest distance between to points in a cluster
00164    * average: the average distance between all points in the cluster
00165    * centroid: twice the average distance between each point and the centroid
00166    * <p> valid values for the distance measure are
00167    * minimum: the minimal distance between the points in the two clusters
00168    * maximum: the maximum distance between the points in the two clusters
00169    * mean: the average distance between all points in the clusters
00170    * centroids: the distance between the centroids of the clusters
00171    * interpoint: combination of mean and centroids
00172    */
00173   class dunnIndex : public clusteringValidity {
00174   public:
00175       
00176     class parameters : public clusteringValidity::parameters {
00177     public:
00178       /**
00179        * default constructor
00180        */
00181       parameters();
00182          
00183       /**
00184        * copy constructor
00185        * @param other the parameters object to be copied
00186        */
00187       parameters(const parameters& other);
00188          
00189       /**
00190        * destructor
00191        */
00192       ~parameters();
00193          
00194       /**
00195        * returns name of this type
00196        */
00197       const char* getTypeName() const;
00198             
00199       /**
00200        * copy the contents of a parameters object
00201        * @param other the parameters object to be copied
00202        * @return a reference to this parameters object
00203        */
00204       parameters& copy(const parameters& other);
00205             
00206       /**
00207        * copy the contents of a parameters object
00208        * @param other the parameters object to be copied
00209        * @return a reference to this parameters object
00210        */
00211       parameters& operator=(const parameters& other);
00212             
00213             
00214       /**
00215        * returns a pointer to a clone of the parameters
00216        */
00217       virtual functor::parameters* clone() const;
00218             
00219       /**
00220        * write the parameters in the given ioHandler
00221        * @param handler the ioHandler to be used
00222        * @param complete if true (the default) the enclosing begin/end will
00223        * be also written, otherwise only the data block will be written.
00224        * @return true if write was successful
00225        */
00226       virtual bool write(ioHandler& handler,const bool complete=true) const;
00227             
00228       /**
00229        * read the parameters from the given ioHandler
00230        * @param handler the ioHandler to be used
00231        * @param complete if true (the default) the enclosing begin/end will
00232        *      be also written, otherwise only the data block will be written.
00233        * @return true if write was successful
00234        */
00235       virtual bool read(ioHandler& handler,const bool complete=true);
00236             
00237 #     ifdef _LTI_MSC_6     
00238       /**
00239        * this function is required by MSVC only, as a workaround for a
00240        * very awful bug, which exists since MSVC V.4.0, and still by
00241        * V.6.0 with all bugfixes (so called "service packs") remains
00242        * there...  This method is also public due to another bug, so please
00243        * NEVER EVER call this method directly: use read() instead
00244        */
00245       bool readMS(ioHandler& handler,const bool complete=true);
00246 
00247       /**
00248        * this function is required by MSVC only, as a workaround for a
00249        * very awful bug, which exists since MSVC V.4.0, and still by
00250        * V.6.0 with all bugfixes (so called "service packs") remains
00251        * there...  This method is also public due to another bug, so please
00252        * NEVER EVER call this method directly: use write() instead
00253        */
00254       bool writeMS(ioHandler& handler,const bool complete=true) const;
00255 #     endif
00256          
00257       // ------------------------------------------------
00258       // the parameters
00259       // ------------------------------------------------
00260       
00261       /**
00262        * Diameter Measure Types
00263        */
00264       enum eDiameterMeasure {
00265         Standard,   /* use standard diameter */
00266         Average,    /* use average distance of all points as diameter */
00267         Centroid    /* use the average distance of each point to 
00268                        the centroid */
00269       };
00270 
00271       /**
00272        * Distance Measure Types
00273        */
00274       enum eDistanceMeasure {
00275         Minimum=0,  /* use minimum distance between clusters */
00276         Maximum,    /* use maximum distance between clusters */
00277         Mean,       /* use average distance between clusters */
00278         Centroids,  /* use distance of the centroids */
00279         Interpoint  /* use average of all interpoint distances */
00280       };
00281 
00282       /**
00283        * Diameter measure
00284        *
00285        * Default value: Standard
00286        */
00287       eDiameterMeasure diameterMeasure;
00288 
00289       /**
00290        * Distance measure
00291        *
00292        * Default value: Minimum
00293        */
00294       eDistanceMeasure distanceMeasure;
00295     };
00296 
00297     // ----------------------------------------------------------
00298     // Dunn Index 
00299     // ----------------------------------------------------------
00300 
00301     /**
00302      * standard contructor
00303      */
00304     dunnIndex();
00305 
00306     /**
00307      * standard contructor
00308      */
00309     dunnIndex(const dunnIndex& other);
00310 
00311     /**
00312      * destructor
00313      */
00314     virtual ~dunnIndex();
00315 
00316     /**
00317      * returns the name of this type
00318      */
00319     virtual const char* getTypeName() const;
00320 
00321     /**
00322      * calculates the Dunn Index of the clustered Data
00323      * @param clusteredData std::vector<dmatrix> with the source data.
00324      * @param index the clustering validity result
00325      * @param centroids dmatrix with each row representing a centroid
00326      *        of the corresponding distribution in clusteredData
00327      * @return true if apply successful or false otherwise.
00328      */
00329     virtual bool apply(const std::vector<dmatrix>& clusteredData,
00330                        double& index, const dmatrix& centroids) const;
00331     
00332     
00333     const parameters& getParameters() const;
00334 
00335     /**
00336      * copy data of "other" functor.
00337      * @param other the functor to be copied
00338      * @return a reference to this functor object
00339      */
00340     dunnIndex& copy(const dunnIndex& other);
00341 
00342     /**
00343      * returns a pointer to a clone of this functor.
00344      */
00345     virtual functor* clone() const;
00346 
00347     /**
00348      * alias for copy member
00349      * @param other the functor to be copied
00350      * @return a reference to this functor object
00351      */
00352     dunnIndex& operator=(const dunnIndex& other);
00353     
00354   };
00355 
00356   ////////////////////////////////////////////////////////////////////////
00357   /////////////// Modified Hubert Statistic //////////////////////////////
00358   ////////////////////////////////////////////////////////////////////////
00359   /** 
00360    * Calculates the modified Hubert statistic of the given clustering.
00361    *
00362    * When the same data set is tested with different parameters, e.a. with
00363    * different number of clusters, the best possible clustering is shown 
00364    * by a sharp knee in this index.
00365    * 
00366    * <em> This validity measure is not defined for one cluster or if
00367    * the number of clusters is equal the the number of points </em>
00368    */
00369   class modHubertStat : public clusteringValidity {
00370   public:
00371 
00372     /**
00373      * standard contructor
00374      */
00375     modHubertStat();
00376 
00377     /**
00378      * standard contructor
00379      */
00380     modHubertStat(const modHubertStat& other);
00381 
00382     /**
00383      * destructor
00384      */
00385     virtual ~modHubertStat();
00386 
00387     /**
00388      * copy data of "other" functor.
00389      * @param other the functor to be copied
00390      * @return a reference to this functor object
00391      */
00392     modHubertStat& copy(const modHubertStat& other);
00393 
00394     /**
00395      * alias for copy member
00396      * @param other the functor to be copied
00397      * @return a reference to this functor object
00398      */
00399     modHubertStat& operator=(const modHubertStat& other);
00400 
00401     /**
00402      * returns the name of this type
00403      */
00404     virtual const char* getTypeName() const;
00405 
00406     /**
00407      * returns a pointer to a clone of this functor.
00408      */
00409     virtual functor* clone() const;
00410 
00411     /**
00412      * calculates the Dunn Index of the clustered Data
00413      * @param clusteredData std::vector<dmatrix> with the source data.
00414      * @param index the clustering validity result
00415      * @param centroids dmatrix with each row representing a centroid
00416      *        of the corresponding distribution in clusteredData
00417      * @return true if apply successful or false otherwise.
00418      */
00419     virtual bool apply(const std::vector<dmatrix>& clusteredData,
00420                        double& index, const dmatrix& centroids) const;
00421   };
00422 
00423   ////////////////////////////////////////////////////////////////////////
00424   ////////// normalized Modified Hubert Statistic ////////////////////////
00425   ////////////////////////////////////////////////////////////////////////
00426   /** 
00427    * Calculates the normalized version of the modified Hubert statistic 
00428    * The index is between -1 and 1. A value near 1 shows compact well 
00429    * separated clusters.
00430    * When the same data set is tested with different parameters, e.a. with
00431    * different number of clusters, the best possible clustering is shown 
00432    * by a sharp knee in this index.
00433    * <em> This validity measure is not defined, if the number of cluster is
00434    * 1 or equal to the number of points </em>
00435    */
00436   class normModHubertStat : public clusteringValidity {
00437   public:
00438 
00439     /**
00440      * standard contructor
00441      */
00442     normModHubertStat();
00443 
00444     /**
00445      * standard contructor
00446      */
00447     normModHubertStat(const normModHubertStat& other);
00448 
00449     /**
00450      * destructor
00451      */
00452     virtual ~normModHubertStat();
00453 
00454     /**
00455      * copy data of "other" functor.
00456      * @param other the functor to be copied
00457      * @return a reference to this functor object
00458      */
00459     normModHubertStat& copy(const normModHubertStat& other);
00460 
00461     /**
00462      * alias for copy member
00463      * @param other the functor to be copied
00464      * @return a reference to this functor object
00465      */
00466     normModHubertStat& operator=(const normModHubertStat& other);
00467 
00468     /**
00469      * returns the name of this type
00470      */
00471     virtual const char* getTypeName() const;
00472 
00473     /**#
00474      * returns a pointer to a clone of this functor.
00475      */
00476     virtual functor* clone() const;
00477 
00478     /**
00479      * calculates the normalized modified Hubert Statistics 
00480      * of the clustered data.
00481      * @param clusteredData std::vector<dmatrix> with the source data.
00482      * @param index the clustering validity result
00483      * @param centroids dmatrix with each row representing a centroid
00484      *        of the corresponding distribution in clusteredData
00485      * @return true if apply successful or false otherwise.
00486      */
00487     virtual bool apply(const std::vector<dmatrix>& clusteredData,
00488                        double& index, const dmatrix& centroids) const;
00489   };
00490 
00491   ////////////////////////////////////////////////////////////////////////
00492   //////////////////////// Davies Bouldin Index //////////////////////////
00493   ////////////////////////////////////////////////////////////////////////
00494 
00495   /** 
00496    * Calculates the Davies Bouldin Index of the given clustering.
00497    * If different clustering, e.a. with different number of clusters, 
00498    * of the same data set are tested, the best
00499    * will be the clustering with the minimal Davies Bouldin Index.
00500    */
00501   class daviesBouldinIndex : public clusteringValidity {
00502   public:
00503 
00504     /**
00505      * standard contructor
00506      */
00507     daviesBouldinIndex();
00508 
00509     /**
00510      * standard contructor
00511      */
00512     daviesBouldinIndex(const daviesBouldinIndex& other);
00513 
00514     /**
00515      * destructor
00516      */
00517     virtual ~daviesBouldinIndex();
00518 
00519     /**
00520      * copy data of "other" functor.
00521      * @param other the functor to be copied
00522      * @return a reference to this functor object
00523      */
00524     daviesBouldinIndex& copy(const daviesBouldinIndex& other);
00525 
00526     /**
00527      * alias for copy member
00528      * @param other the functor to be copied
00529      * @return a reference to this functor object
00530      */
00531     daviesBouldinIndex& operator=(const daviesBouldinIndex& other);
00532 
00533     /**
00534      * returns the name of this type
00535      */
00536     virtual const char* getTypeName() const;
00537 
00538     /**
00539      * returns a pointer to a clone of this functor.
00540      */
00541     virtual functor* clone() const;
00542 
00543     /**
00544      * calculates the Davies Bouldin Index of the clustered data.
00545      * @param clusteredData std::vector<dmatrix> with the source data.
00546      * @param index the clustering validity result
00547      * @param centroids dmatrix with each row representing a centroid
00548      *        of the corresponding distribution in clusteredData
00549      * @return true if apply successful or false otherwise.
00550      */
00551     virtual bool apply(const std::vector<dmatrix>& clusteredData,
00552                        double& index, const dmatrix& centroids) const;
00553   };
00554 
00555 
00556 }
00557 
00558 #endif

Generated on Sat Apr 10 15:25:14 2010 for LTI-Lib by Doxygen 1.6.1