latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*-------------------------------------------------------------------- 00025 * project ....: LTI-Lib: Image Processing and Computer Vision Library 00026 * file .......: ltiCompetitiveAgglomeration.h 00027 * authors ....: Benjamin Winkler 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 21.1.2004 00030 * revisions ..: $Id: ltiCompetitiveAgglomeration.h,v 1.3 2006/02/07 18:15:12 ltilib Exp $ 00031 */ 00032 00033 #ifndef _LTI_COMPETITIVE_AGGLOMERATION_H_ 00034 #define _LTI_COMPETITIVE_AGGLOMERATION_H_ 00035 00036 #include "ltiVector.h" 00037 #include "ltiCentroidClustering.h" 00038 #include "ltiL2Distance.h" 00039 00040 namespace lti { 00041 /** 00042 * This class implements the centroid clustering algorithm presented in 00043 * "Clustering by Competitive Agglomeration" from Frigui and Krishnapuram, Pattern Recognition, 00044 * Vol. 30, No. 7, pp. 1109-1119, 1997. 00045 * The algorithm is a fuzzy clustering algorithm, which reduces a given partition to an optimal 00046 * number of clusters. Here, the initial partition is generated by first performing a fuzzy-C-Means 00047 * run on the data. 00048 * Note: since the number of clusters will only be decreased by this algorithm, the fuzzy-C-Means 00049 * parameters should be modified such that the number of clusters is chosen to be much larger than 00050 * the expected number of clusters. 00051 */ 00052 class competitiveAgglomeration : public centroidClustering { 00053 public: 00054 /** 00055 * The parameters for the class competitiveAgglomeration 00056 */ 00057 class parameters : public centroidClustering::parameters { 00058 public: 00059 /** 00060 * Default constructor 00061 */ 00062 parameters(); 00063 00064 /** 00065 * Copy constructor 00066 * @param other the parameters object to be copied 00067 */ 00068 parameters(const parameters& other); 00069 00070 /** 00071 * Destructor 00072 */ 00073 ~parameters(); 00074 00075 /** 00076 * Returns name of this type 00077 */ 00078 const char* getTypeName() const; 00079 00080 /** 00081 * Copy the contents of a parameters object 00082 * @param other the parameters object to be copied 00083 * @return a reference to this parameters object 00084 */ 00085 parameters& copy(const parameters& other); 00086 00087 /** 00088 * Copy the contents of a parameters object 00089 * @param other the parameters object to be copied 00090 * @return a reference to this parameters object 00091 */ 00092 parameters& operator=(const parameters& other); 00093 00094 00095 /** 00096 * Returns a pointer to a clone of the parameters 00097 */ 00098 virtual classifier::parameters* clone() const; 00099 00100 /** 00101 * Write the parameters in the given ioHandler 00102 * @param handler the ioHandler to be used 00103 * @param complete if true (the default) the enclosing begin/end will 00104 * be also written, otherwise only the data block will be written. 00105 * @return true if write was successful 00106 */ 00107 virtual bool write(ioHandler& handler,const bool complete=true) const; 00108 00109 /** 00110 * Read the parameters from the given ioHandler 00111 * @param handler the ioHandler to be used 00112 * @param complete if true (the default) the enclosing begin/end will 00113 * be also written, otherwise only the data block will be written. 00114 * @return true if write was successful 00115 */ 00116 virtual bool read(ioHandler& handler,const bool complete=true); 00117 00118 # ifdef _LTI_MSC_6 00119 /** 00120 * This function is required by MSVC only, as a workaround for a 00121 * very awful bug, which exists since MSVC V.4.0, and still by 00122 * V.6.0 with all bugfixes (so called "service packs") remains 00123 * there... This method is also public due to another bug, so please 00124 * NEVER EVER call this method directly: use read() instead 00125 */ 00126 bool readMS(ioHandler& handler,const bool complete=true); 00127 00128 /** 00129 * This function is required by MSVC only, as a workaround for a 00130 * very awful bug, which exists since MSVC V.4.0, and still by 00131 * V.6.0 with all bugfixes (so called "service packs") remains 00132 * there... This method is also public due to another bug, so please 00133 * NEVER EVER call this method directly: use write() instead 00134 */ 00135 bool writeMS(ioHandler& handler,const bool complete=true) const; 00136 # endif 00137 00138 // ------------------------------------------------ 00139 // the parameters 00140 // ------------------------------------------------ 00141 00142 /** 00143 * The cardinality factor defines the importance of merging nearby clusters 00144 * relative to covering the data optimally. Larger values will result in fewer 00145 * clusters, with a value of 0.0 no cluster will be discarded, thus leading to 00146 * the same result as the fuzzy C-Means algorithm. 00147 * Default value is 5.0 00148 */ 00149 double cardinalityFactor; 00150 00151 /** 00152 * As the cardinality factor relies heavily on the data, the distance measure 00153 * and the number of clusters, a decay function is used that starts with the 00154 * given cardinality factor and exponentially decreases the value with each 00155 * iteration: exp(-iteration / timeConstant). This should result in finding 00156 * the optimal number of clusters faster. 00157 * The default time constant is 10.0 00158 */ 00159 double timeConstant; 00160 00161 00162 /** 00163 * The initial number of clusters must be larger than the expected optimal 00164 * number of clusters. 00165 * Default is 20. 00166 */ 00167 int initialNumberOfClusters; 00168 00169 /** 00170 * The number of iterations to be used for fuzzy c-means pre-classification. 00171 * Default: 10. 00172 */ 00173 int initialIterations; 00174 00175 /** 00176 * The cardinality describes the amount of data that is being covered by the cluster. 00177 * If the cardinality of a cluster drops below a given threshold, it will be discarded. 00178 * The default value for this threshold is 5.0 00179 */ 00180 double minimumCardinality; 00181 00182 /** 00183 * Bias the algorithm either towards hard clustering (nearby 1) or 00184 * fuzzy clustering (bigger 1); this parameter must be bigger than 1. 00185 * Default is 2.0 00186 */ 00187 double fuzzifier; 00188 00189 /** 00190 * The maximum number of iterations serves as one of two convergence criteria, the other one 00191 * being convergenceThreshold. 00192 * Default value is 100. 00193 */ 00194 int maxIterations; 00195 00196 /** 00197 * The algorithm converges, when the centroids remain stable. If the sum of the L2 distance 00198 * of the relative movements of the centroids in one iteration is below the convergence threshold, 00199 * the algorithm terminates. 00200 * Default: 0.02 00201 */ 00202 double convergenceThreshold; 00203 00204 }; 00205 00206 /** 00207 * Default constructor 00208 */ 00209 competitiveAgglomeration(); 00210 00211 /** 00212 * Construct a classifier using the given parameters 00213 */ 00214 competitiveAgglomeration(const parameters& par); 00215 00216 /** 00217 * Copy constructor 00218 * @param other the object to be copied 00219 */ 00220 competitiveAgglomeration(const competitiveAgglomeration& other); 00221 00222 /** 00223 * Destructor 00224 */ 00225 virtual ~competitiveAgglomeration(); 00226 00227 /** 00228 * Returns the name of this type ("competitiveAgglomeration") 00229 */ 00230 virtual const char* getTypeName() const; 00231 00232 00233 /** 00234 * Copy data of "other" classifier. 00235 * @param other the classifier to be copied 00236 * @return a reference to this classifier object 00237 */ 00238 competitiveAgglomeration& copy(const competitiveAgglomeration& other); 00239 00240 /** 00241 * Alias for copy member 00242 * @param other the classifier to be copied 00243 * @return a reference to this classifier object 00244 */ 00245 competitiveAgglomeration& operator=(const competitiveAgglomeration& other); 00246 00247 /** 00248 * Returns a pointer to a clone of this classifier. 00249 */ 00250 virtual classifier* clone() const; 00251 00252 /** 00253 * Returns used parameters 00254 */ 00255 const parameters& getParameters() const; 00256 00257 /** 00258 * train clusters from given data. 00259 */ 00260 bool train(const lti::dmatrix& input); 00261 00262 /** 00263 * calls centroidClustering::train 00264 */ 00265 bool train(const lti::dmatrix& input, ivector& ids) { 00266 return centroidClustering::train(input, ids); 00267 } 00268 00269 00270 private: 00271 00272 // fuzzify this value (distance ^ fuzzifier) 00273 inline double fuzzify(const double& distance) const { 00274 return lti::pow(distance, fuzzifier); 00275 } 00276 00277 // update distance matrix 00278 void updateDistance(const dmatrix &trainingData); 00279 00280 // calculate the proportional alpha value, based on the iteration, the time constant, 00281 // the partition and the distance matrix 00282 double calculateAlpha(int iteration, double timeConstant) const; 00283 00284 // update the cardinality of all clusters 00285 void updateCardinality(); 00286 00287 // update the partition matrix 00288 void updateMembership(double cardinalityFactor); 00289 00290 // remove cluster k, adapt all matrices accordingly 00291 void discardCluster(int k); 00292 00293 // calculate centroids from the partition matrix 00294 void updatePrototypes(const dmatrix &trainingData); 00295 00296 // internal data 00297 l2Distance<double> distFunc; 00298 dmatrix distanceMatrix; 00299 dmatrix partitionMatrix; 00300 dvector clusterCardinality; 00301 00302 // for faster access 00303 int numberOfClusters; 00304 int numberOfPoints; 00305 int numberOfDimensions; 00306 double fuzzifier; 00307 }; 00308 } 00309 00310 #endif