LTI-Lib latest version v1.9 - last update 10 Apr 2010

ltiCompetitiveAgglomeration.h

00001 /*
00002  * Copyright (C) 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiCompetitiveAgglomeration.h
00027  * authors ....: Benjamin Winkler
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 21.1.2004
00030  * revisions ..: $Id: ltiCompetitiveAgglomeration.h,v 1.3 2006/02/07 18:15:12 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_COMPETITIVE_AGGLOMERATION_H_
00034 #define _LTI_COMPETITIVE_AGGLOMERATION_H_
00035 
00036 #include "ltiVector.h"
00037 #include "ltiCentroidClustering.h"
00038 #include "ltiL2Distance.h"
00039 
00040 namespace lti {
00041   /**
00042    * This class implements the centroid clustering algorithm presented in
00043    * "Clustering by Competitive Agglomeration" from Frigui and Krishnapuram, Pattern Recognition,
00044    * Vol. 30, No. 7, pp. 1109-1119, 1997.
00045    * The algorithm is a fuzzy clustering algorithm, which reduces a given partition to an optimal
00046    * number of clusters. Here, the initial partition is generated by first performing a fuzzy-C-Means
00047    * run on the data.
00048    * Note: since the number of clusters will only be decreased by this algorithm, the fuzzy-C-Means
00049    * parameters should be modified such that the number of clusters is chosen to be much larger than
00050    * the expected number of clusters.
00051    */
00052   class competitiveAgglomeration : public centroidClustering {
00053   public:
00054     /**
00055      * The parameters for the class competitiveAgglomeration
00056      */
00057     class parameters : public centroidClustering::parameters {
00058     public:
00059       /**
00060        * Default constructor
00061        */
00062       parameters();
00063 
00064       /**
00065        * Copy constructor
00066        * @param other the parameters object to be copied
00067        */
00068       parameters(const parameters& other);
00069 
00070       /**
00071        * Destructor
00072        */
00073       ~parameters();
00074 
00075       /**
00076        * Returns name of this type
00077        */
00078       const char* getTypeName() const;
00079 
00080       /**
00081        * Copy the contents of a parameters object
00082        * @param other the parameters object to be copied
00083        * @return a reference to this parameters object
00084        */
00085       parameters& copy(const parameters& other);
00086 
00087       /**
00088        * Copy the contents of a parameters object
00089        * @param other the parameters object to be copied
00090        * @return a reference to this parameters object
00091        */
00092       parameters& operator=(const parameters& other);
00093 
00094 
00095       /**
00096        * Returns a pointer to a clone of the parameters
00097        */
00098       virtual classifier::parameters* clone() const;
00099 
00100       /**
00101        * Write the parameters in the given ioHandler
00102        * @param handler the ioHandler to be used
00103        * @param complete if true (the default) the enclosing begin/end will
00104        *        be also written, otherwise only the data block will be written.
00105        * @return true if write was successful
00106        */
00107       virtual bool write(ioHandler& handler,const bool complete=true) const;
00108 
00109       /**
00110        * Read the parameters from the given ioHandler
00111        * @param handler the ioHandler to be used
00112        * @param complete if true (the default) the enclosing begin/end will
00113        *        be also written, otherwise only the data block will be written.
00114        * @return true if write was successful
00115        */
00116       virtual bool read(ioHandler& handler,const bool complete=true);
00117 
00118 #     ifdef _LTI_MSC_6
00119       /**
00120        * This function is required by MSVC only, as a workaround for a
00121        * very awful bug, which exists since MSVC V.4.0, and still by
00122        * V.6.0 with all bugfixes (so called "service packs") remains
00123        * there...  This method is also public due to another bug, so please
00124        * NEVER EVER call this method directly: use read() instead
00125        */
00126       bool readMS(ioHandler& handler,const bool complete=true);
00127 
00128       /**
00129        * This function is required by MSVC only, as a workaround for a
00130        * very awful bug, which exists since MSVC V.4.0, and still by
00131        * V.6.0 with all bugfixes (so called "service packs") remains
00132        * there...  This method is also public due to another bug, so please
00133        * NEVER EVER call this method directly: use write() instead
00134        */
00135       bool writeMS(ioHandler& handler,const bool complete=true) const;
00136 #     endif
00137 
00138       // ------------------------------------------------
00139       // the parameters
00140       // ------------------------------------------------
00141 
00142       /**
00143        * The cardinality factor defines the importance of merging nearby clusters
00144        * relative to covering the data optimally. Larger values will result in fewer
00145        * clusters, with a value of 0.0 no cluster will be discarded, thus leading to
00146        * the same result as the fuzzy C-Means algorithm.
00147        * Default value is 5.0
00148        */
00149       double cardinalityFactor;
00150 
00151       /**
00152        * As the cardinality factor relies heavily on the data, the distance measure
00153        * and the number of clusters, a decay function is used that starts with the
00154        * given cardinality factor and exponentially decreases the value with each
00155        * iteration: exp(-iteration / timeConstant). This should result in finding
00156        * the optimal number of clusters faster.
00157        * The default time constant is 10.0
00158        */
00159       double timeConstant;
00160 
00161 
00162       /**
00163        * The initial number of clusters must be larger than the expected optimal
00164        * number of clusters.
00165        * Default is 20.
00166        */
00167       int initialNumberOfClusters;
00168 
00169       /**
00170        * The number of iterations to be used for fuzzy c-means pre-classification.
00171        * Default: 10.
00172        */
00173       int initialIterations;
00174 
00175       /**
00176        * The cardinality describes the amount of data that is being covered by the cluster.
00177        * If the cardinality of a cluster drops below a given threshold, it will be discarded.
00178        * The default value for this threshold is 5.0
00179        */
00180       double minimumCardinality;
00181 
00182       /**
00183        * Bias the algorithm either towards hard clustering (nearby 1) or
00184        * fuzzy clustering (bigger 1); this parameter must be bigger than 1.
00185        * Default is 2.0
00186        */
00187       double fuzzifier;
00188 
00189       /**
00190        * The maximum number of iterations serves as one of two convergence criteria, the other one
00191        * being convergenceThreshold.
00192        * Default value is 100.
00193        */
00194       int maxIterations;
00195 
00196       /**
00197        * The algorithm converges, when the centroids remain stable. If the sum of the L2 distance
00198        * of the relative movements of the centroids in one iteration is below the convergence threshold,
00199        * the algorithm terminates.
00200        * Default: 0.02
00201        */
00202       double convergenceThreshold;
00203 
00204     };
00205 
00206     /**
00207      * Default constructor
00208      */
00209     competitiveAgglomeration();
00210 
00211     /**
00212      * Construct a classifier using the given parameters
00213      */
00214     competitiveAgglomeration(const parameters& par);
00215 
00216     /**
00217      * Copy constructor
00218      * @param other the object to be copied
00219      */
00220     competitiveAgglomeration(const competitiveAgglomeration& other);
00221 
00222     /**
00223      * Destructor
00224      */
00225     virtual ~competitiveAgglomeration();
00226 
00227     /**
00228      * Returns the name of this type ("competitiveAgglomeration")
00229      */
00230     virtual const char* getTypeName() const;
00231     
00232 
00233     /**
00234      * Copy data of "other" classifier.
00235      * @param other the classifier to be copied
00236      * @return a reference to this classifier object
00237      */
00238     competitiveAgglomeration& copy(const competitiveAgglomeration& other);
00239 
00240     /**
00241      * Alias for copy member
00242      * @param other the classifier to be copied
00243      * @return a reference to this classifier object
00244      */
00245     competitiveAgglomeration& operator=(const competitiveAgglomeration& other);
00246 
00247     /**
00248      * Returns a pointer to a clone of this classifier.
00249      */
00250     virtual classifier* clone() const;
00251 
00252     /**
00253      * Returns used parameters
00254      */
00255     const parameters& getParameters() const;
00256 
00257     /**
00258      * train clusters from given data.
00259      */
00260     bool train(const lti::dmatrix& input);
00261 
00262     /**
00263      * calls centroidClustering::train
00264      */
00265     bool train(const lti::dmatrix& input, ivector& ids) {
00266       return centroidClustering::train(input, ids);
00267     }
00268 
00269 
00270   private:
00271 
00272     // fuzzify this value (distance ^ fuzzifier)
00273     inline double fuzzify(const double& distance) const {
00274       return lti::pow(distance, fuzzifier);
00275     }
00276 
00277     // update distance matrix
00278     void updateDistance(const dmatrix &trainingData);
00279 
00280     // calculate the proportional alpha value, based on the iteration, the time constant,
00281     // the partition and the distance matrix
00282     double calculateAlpha(int iteration, double timeConstant) const;
00283 
00284     // update the cardinality of all clusters
00285     void updateCardinality();
00286 
00287     // update the partition matrix
00288     void updateMembership(double cardinalityFactor);
00289 
00290     // remove cluster k, adapt all matrices accordingly
00291     void discardCluster(int k);
00292 
00293     // calculate centroids from the partition matrix
00294     void updatePrototypes(const dmatrix &trainingData);
00295 
00296     // internal data
00297     l2Distance<double> distFunc;
00298     dmatrix distanceMatrix;
00299     dmatrix partitionMatrix;
00300     dvector clusterCardinality;
00301 
00302     // for faster access
00303     int numberOfClusters;
00304     int numberOfPoints;
00305     int numberOfDimensions;
00306     double fuzzifier;
00307   };
00308 }
00309 
00310 #endif

Generated on Sat Apr 10 15:25:16 2010 for LTI-Lib by Doxygen 1.6.1