latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*-------------------------------------------------------------------- 00025 * project ....: LTI-Lib: Image Processing and Computer Vision Library 00026 * file .......: ltiParetoFront.h 00027 * authors ....: Pablo Alvarado 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 25.11.2003 00030 * revisions ..: $Id: ltiParetoFront.h,v 1.6 2006/03/10 18:52:05 alvarado Exp $ 00031 */ 00032 00033 #ifndef _LTI_PARETO_FRONT_H_ 00034 #define _LTI_PARETO_FRONT_H_ 00035 00036 #include <list> 00037 #include <vector> 00038 #include <fstream> 00039 #include "ltiVector.h" 00040 #include "ltiMatrix.h" 00041 #include "ltiContinuousRandDist.h" 00042 #include "ltiFunctor.h" 00043 #include "ltiProgressInfo.h" 00044 #include "ltiLispStreamHandler.h" 00045 00046 namespace lti { 00047 /** 00048 * Pareto Front computation with PESA 00049 * 00050 * The Pareto Front functor provides an evaluation mechanism for algorithms, 00051 * in which the qualitiy of the algorithm cannot be restricted to one single 00052 * scalar measure. It is adecuate to compare different parameterizations for 00053 * one or several algorithms, which are supposed to solve the same task. 00054 * 00055 * This class generates a "front" in a multidimensional fitness 00056 * space, which represents a trade-off between several fitness 00057 * values, that each derived class explicitelly implements. 00058 * 00059 * The algorithm used here to generate the front is called PESA (Pareto 00060 * Envelope-based Selection Algorithm), and it is described in: 00061 * 00062 * David. W. Corne, Joshua D. Knowles and Martin J. Oates 00063 * The Pareto Envelope-based Selection Algorithm for Multiobjective 00064 * Optimization. In: Proceedings of the International Conference on Parallel 00065 * Problem Solving from Nature (PPSN VI). (2000) 839-848 00066 * 00067 * A good introduction to the application of this problem for the 00068 * evaluation of image processing algorithms, specifically to the evaluation 00069 * of segmentation is given in: 00070 * 00071 * Mark Everingham, Henk Muller and Barry Thomas, Evaluating Image 00072 * Segmentation Algorithms using the Pareto Front. In Proceedings of 00073 * the 7th European Conference on Computer Vision (ECCV2002), Part 00074 * IV (LNCS 2353), pages 34-48. Springer, June 2002. 00075 * 00076 * http://www.cs.bris.ac.uk/Tools/Reports/Abstracts/2002-everingham-1.html 00077 * 00078 * This is the parent class for all evaluation algorithms using PESA. Its 00079 * kern administrates the evolutionary part of the algorithm and the derived 00080 * classes provide the adaptions necessary for each application in 00081 * particular, which include the conversion between functor parameters and a 00082 * binary chain chromosome representation, the computation of the fitness 00083 * measures, etc. 00084 * 00085 * The current class also provides some simple tools that simplify the 00086 * construction of evaluation functors. 00087 * 00088 * There is a deviation from the original paper which is more suitable for 00089 * the evaluation of algorithms used here. The original algorithms 00090 * separates the fitness space into regular hyperboxes and computes the 00091 * density of individuals in each hyperbox to decide which elements will be 00092 * used for crossover or mutation (those boxes with the smallest density) 00093 * and which elements need to be removed (those with the highest density). 00094 * The computation of the density is in the present functor much more 00095 * computationally expensive, but its computation is nevertheless negligible 00096 * in comparison to the computation of the fitness measures for an 00097 * algorithm: 00098 * 00099 * The current algorithm keeps track of the bounding box of the fitness 00100 * space for which individuals have been created. This bounding box is used 00101 * to determine the size of a Gaussian kernel, which will be used on each 00102 * individual to compute the influence of all other individuals in the 00103 * front. This way, the selection is not strongly bounded to the selection 00104 * of a partitioning of the fitness space. 00105 * 00106 * For an example of an evaluation class see lti::locationSearchEvaluation. 00107 * 00108 * \section Progress information 00109 * 00110 * To monitor the progress of the evolutionary process this functor makes use 00111 * of classes derived from lti::progressInfo, where the detail level of 00112 * the substeps can be used to show the information required. 00113 * - Detail Level 0: Only the steps are shown, with the front size and 00114 * the numbers of new individuals inserted. 00115 * - Detail Level 1: A line for each individual in the internal 00116 * population indicating the start of the evaluation is 00117 * shown 00118 * - Detail Level 2: For each individual, the fitness vector computed 00119 * is also displayed. 00120 * - Detail Level 3: At the end of each evaluation cicle (all internal 00121 * population already evaluated) show 00122 * - New non-dominated child (NNDC) 00123 * - Recently dead individual (RDIn) 00124 * - Individual removed by overpopulation (HDRI) 00125 */ 00126 class paretoFront : public functor { 00127 public: 00128 /** 00129 * The parameters for the class paretoFront. 00130 * 00131 * These are the general parameters for the Pareto Front computation. 00132 * For each special evaluation class, more attributes are usually added. 00133 */ 00134 class parameters : public functor::parameters { 00135 public: 00136 /** 00137 * Default constructor 00138 */ 00139 parameters(); 00140 00141 /** 00142 * Copy constructor 00143 * @param other the parameters object to be copied 00144 */ 00145 parameters(const parameters& other); 00146 00147 /** 00148 * Destructor 00149 */ 00150 ~parameters(); 00151 00152 /** 00153 * Returns name of this type 00154 */ 00155 const char* getTypeName() const; 00156 00157 /** 00158 * Copy the contents of a parameters object 00159 * @param other the parameters object to be copied 00160 * @return a reference to this parameters object 00161 */ 00162 parameters& copy(const parameters& other); 00163 00164 /** 00165 * Copy the contents of a parameters object 00166 * @param other the parameters object to be copied 00167 * @return a reference to this parameters object 00168 */ 00169 parameters& operator=(const parameters& other); 00170 00171 00172 /** 00173 * Returns a pointer to a clone of the parameters 00174 */ 00175 virtual functor::parameters* clone() const; 00176 00177 /** 00178 * Write the parameters in the given ioHandler 00179 * @param handler the ioHandler to be used 00180 * @param complete if true (the default) the enclosing begin/end will 00181 * be also written, otherwise only the data block will be written. 00182 * @return true if write was successful 00183 */ 00184 virtual bool write(ioHandler& handler,const bool& complete=true) const; 00185 00186 /** 00187 * Read the parameters from the given ioHandler 00188 * @param handler the ioHandler to be used 00189 * @param complete if true (the default) the enclosing begin/end will 00190 * be also written, otherwise only the data block will be written. 00191 * @return true if write was successful 00192 */ 00193 virtual bool read(ioHandler& handler,const bool& complete=true); 00194 00195 # ifdef _LTI_MSC_6 00196 /** 00197 * This function is required by MSVC only, as a workaround for a 00198 * very awful bug, which exists since MSVC V.4.0, and still by 00199 * V.6.0 with all bugfixes (so called "service packs") remains 00200 * there... This method is also public due to another bug, so please 00201 * NEVER EVER call this method directly: use read() instead 00202 */ 00203 bool readMS(ioHandler& handler,const bool& complete=true); 00204 00205 /** 00206 * This function is required by MSVC only, as a workaround for a 00207 * very awful bug, which exists since MSVC V.4.0, and still by 00208 * V.6.0 with all bugfixes (so called "service packs") remains 00209 * there... This method is also public due to another bug, so please 00210 * NEVER EVER call this method directly: use write() instead 00211 */ 00212 bool writeMS(ioHandler& handler,const bool& complete=true) const; 00213 # endif 00214 00215 // ------------------------------------------------ 00216 // the parameters 00217 // ------------------------------------------------ 00218 00219 /** 00220 * Crossover probability. 00221 * 00222 * The PESA algorithms does a uniform crossover with this 00223 * probability Pc. This means, with probability Pc a crossover 00224 * between two parents will be done, otherwise only mutation 00225 * will be done. 00226 * 00227 * Default value: 0.7 00228 */ 00229 double crossoverProbability; 00230 00231 /** 00232 * Initial Bit-flip mutation probability. 00233 * 00234 * This value is usually set to 1/L, where L is the size of a 00235 * chromosome, i.e. the bit-length size given to the binary parameter 00236 * representation. (see paretoFront::phenotypeToChromosome). 00237 * 00238 * If negative, the value used will be |mutationRate|/L. 00239 * If positive, the value will be used "as is". 00240 * 00241 * Note that if positive, it only makes sense to have values between 0 00242 * and 1. 00243 * 00244 * This value has to be greater or equal finalMutationRate. 00245 * 00246 * Default value: -1 (i.e. 1/L will be used) 00247 */ 00248 double initialMutationRate; 00249 00250 /** 00251 * Final Bit-flip mutation probability. 00252 * 00253 * This value is usually set to 1/L, where L is the size of a 00254 * chromosome, i.e. the bit-length size given to the binary parameter 00255 * representation. (see paretoFront::phenotypeToChromosome). 00256 * 00257 * If negative, the value used will be |mutationRate|/L. 00258 * If positive, the value will be used "as is". 00259 * 00260 * Note that if positive, it only makes sense to have values between 0 00261 * and 1. 00262 * 00263 * This value has to be smaller or equal initialMutationRate. 00264 * 00265 * Default value: -1 (i.e. 1/L will be used) 00266 */ 00267 double finalMutationRate; 00268 00269 /** 00270 * Mutation Rate Decay Value 00271 * 00272 * It is possible to begin the evolution with a higher mutation rate 00273 * than in a "stable" evolution. This supports a more random search a 00274 * the beginning, where nothing really good has been found. After a few 00275 * steps can be however desirable to slowly reduce the mutation rate 00276 * into a more normal value. 00277 * 00278 * The ecuation used for the real mutation rate is: 00279 * (initialMutationRate-finalMutationRate)*exp(-i/d) with "i" the 00280 * iteration number and "d" this decay rate value. 00281 * 00282 * The smaller this value, the faster the mutation rate converges to 00283 * its final value. 00284 * 00285 * This value must be strictly positive (never zero). If you want a 00286 * "traditional" PESA, just set the initial and final mutation rates 00287 * with the same value. 00288 * 00289 * Default value: 33.38 (i.e. after 100 steps only 5\% of the 00290 * (final-initial) interval remains) 00291 */ 00292 double mutationDecayRate; 00293 00294 /** 00295 * Size of elements that constitute the Pareto Front. 00296 * 00297 * Default value: 100 00298 */ 00299 int externalPopulationSize; 00300 00301 /** 00302 * Internal population size 00303 * 00304 * Size of elements produced by each iteration through cross 00305 * over or mutation as candidates for the front. 00306 * 00307 * Default value: 10 00308 */ 00309 int internalPopulationSize; 00310 00311 /** 00312 * Dimensionality of the space analyzed by the Pareto Front. 00313 * 00314 * Note that this is unrelated with the parameter-space, which usually 00315 * has many more dimensions that the fitness space. 00316 * 00317 * Usual values are 2 or 3, since more dimensions are very difficult to 00318 * visualize. 00319 * 00320 * Default value: 2 00321 */ 00322 int fitnessSpaceDimensionality; 00323 00324 /** 00325 * Number of iterations. 00326 * 00327 * The process of generating an internal population and then assign the 00328 * best candidates to the Pareto Front is repeated a number of times 00329 * specified by this parameter. 00330 * 00331 * Note that the total number of evaluations for the algorithms will 00332 * be approximatelly this factor times internalPopulationSize. If 00333 * you really want at least externalPopulationSize elements in the 00334 * Pareto front, you need to provide enough iterations to allow that, 00335 * which should be considerably greater than 00336 * externalPopulationSize/internalPopulationSize, since not all 00337 * generated members are added to the pareto front. 00338 * 00339 * Default value: 1000 00340 */ 00341 int numOfIterations; 00342 00343 /** 00344 * Log all evaluated individuals. 00345 * 00346 * Sometimes, for documentation or debug purposes, you will want to know 00347 * all created individuals, even the ones not belonging to the pareto 00348 * front. Since they are usually not required, and they demand some 00349 * resources, it is left to you if you want to keep track of them or not. 00350 * 00351 * Set this parameter to true, if you want to store all generated and 00352 * evaluated individuals, of false, if you want to save the space and 00353 * time required to remember them. 00354 * 00355 * Default value: false 00356 */ 00357 bool logAllEvaluations; 00358 00359 /** 00360 * Fitness space partitioning. 00361 * 00362 * The choice which individual(s) in the Pareto front should be chosen 00363 * for crossover or mutation is taken on a fitness-space density measure. 00364 * Elements will be removed from the dense locations, since there are 00365 * enough prototypes for those places, and for the generation of new 00366 * ones candidates are taken from the low-density regions. 00367 * 00368 * The bounding box for the fitness space will be computed 00369 * automatically as new elements are generated. 00370 * To determine the density at each location, a Gaussian 00371 * kernel will be used. Its covariance matrix is assumed diagonal, where 00372 * each dimension will have as std. deviation a sixth of the length 00373 * obtaind dividing the interval with the given factor. 00374 * 00375 * In the original PESA paper a fixed grid was used, but this has 00376 * limitations in the reachable precision. 00377 * 00378 * Default value: 32 00379 */ 00380 int fitnessSpacePartition; 00381 00382 /** 00383 * Sort result in scanning order. 00384 * 00385 * If true, the individuals of the front will be sorted in ascending 00386 * order of their multidimensional fitness. 00387 * 00388 * In principle, this sorting has no semantical effects about the 00389 * overall fitness of an individual, i.e. an individual later in the 00390 * list is not necessarily better than another one with a smaller index. 00391 * This sorting is more oriented towards drawing tasks for the Pareto 00392 * front. 00393 * 00394 * According to the fitness scanning ordering, and individual A greater 00395 * than an individual B if 00396 * 00397 * \code 00398 * (A[n-1] > B[n-1]) or 00399 * ((A[n-1] == A[n-1]) and ( (A[n-2] > A[n-2]) or 00400 * ((A[n-2] == A[n-2]) and (A[n-3] > A[n-3])) 00401 * ... )) 00402 * \endcode 00403 * 00404 * This is the same ordering employed for lti::tpoint<T> 00405 * 00406 * Sorting will use the STL methods to sort efficiently the result. 00407 * However, it is optional in case you don't care how the individuals 00408 * are sorted. 00409 * 00410 * Default value: false 00411 */ 00412 bool sortResult; 00413 00414 /** 00415 * @name Log options 00416 */ 00417 //@{ 00418 /** 00419 * Activate log. 00420 * 00421 * If true, every new individual that is inserted to the front will be 00422 * logged in the given file. Later on, you can use a special apply to 00423 * continue the analysis of a broken progress. 00424 * 00425 * Default value: false 00426 */ 00427 bool logFront; 00428 00429 /** 00430 * Log Filename. 00431 * 00432 * Filename used for the log of patterns. The data will be written in 00433 * an ASCII format using a lti::lispStreamHandler. It will contain for 00434 * each individual the chromosome binary representation and the computed 00435 * fitness. At the beginning it will save all parameters necessary to 00436 * bring this functor to a compatible state. 00437 * 00438 * Default value: "pareto.log" 00439 */ 00440 std::string logFilename; 00441 //@} 00442 }; 00443 00444 /** 00445 * Default constructor 00446 */ 00447 paretoFront(); 00448 00449 /** 00450 * Construct a functor using the given parameters 00451 */ 00452 paretoFront(const parameters& par); 00453 00454 /** 00455 * Copy constructor 00456 * @param other the object to be copied 00457 */ 00458 paretoFront(const paretoFront& other); 00459 00460 /** 00461 * Destructor 00462 */ 00463 virtual ~paretoFront(); 00464 00465 /** 00466 * Returns the name of this type ("paretoFront") 00467 */ 00468 virtual const char* getTypeName() const; 00469 00470 /** 00471 * Compute the Pareto Front 00472 * 00473 * Since the evaluation function can get as input any kind of information, 00474 * it is left to the derived classes to provide alternative way of doing 00475 * this: other apply methods or use() methods to indicate the set of data 00476 * to be used. 00477 * 00478 * The Pareto Front will be represent by a matrix of size \e m x 00479 * \e n with \e m parameters::externalPopulationSize and \e n 00480 * fitnessSpaceDimensionality. 00481 * 00482 * @param front matrix containing for each row the multidimensional 00483 * fitness value. 00484 * 00485 * @return true if apply successful or false otherwise. 00486 */ 00487 virtual bool apply(matrix<double>& front); 00488 00489 /** 00490 * Compute the Pareto Front and the corresponding parameter objects 00491 * for each non-dominated point. 00492 * 00493 * Since the evaluation function can get as input any kind of information, 00494 * it is left to the derived classes to provide alternative ways of doing 00495 * this: other apply methods or use() methods to indicate the data set 00496 * to be used in the algorithm evaluation. 00497 * 00498 * The Pareto Front will be represented by a matrix of size \e m x 00499 * \e n with \e m parameters::externalPopulationSize and \e n 00500 * fitnessSpaceDimensionality. 00501 * 00502 * For each row in this matrix (an individual), the corresponding 00503 * parameter setting (a phenotype) is returned. Since different 00504 * algorithms usually use different parameters, this can only be 00505 * done with dynamically allocated objects. You need to take care of 00506 * them, but this class provides an utility trash() method, which deletes 00507 * them for you if you want to. 00508 * 00509 * @param front matrix containing for each row the multidimensional 00510 * fitness value. 00511 * @param phenotypes the parameters for each fitness value. 00512 * 00513 * @return true if apply successful or false otherwise. 00514 */ 00515 virtual bool apply(matrix<double>& front, 00516 std::vector<functor::parameters*>& phenotypes); 00517 00518 00519 /** 00520 * Compute the Pareto Front and the corresponding parameter objects 00521 * for each non-dominated point. 00522 * 00523 * This is a special method which reassumes a broken analysis, gaining 00524 * the lost information from a previously generated file. That file 00525 * can be generated activating in the parameters the option logFront and 00526 * indicating there the name for the file. 00527 * 00528 * This method will work only if the parameters logFront and logFilename 00529 * are properly set (logFront in true and logFilename with the proper 00530 * filename). It is not named "apply" for the simple reason that the 00531 * parameters will be changed to those stored in the log-file itself. 00532 * 00533 * Since the evaluation function can get as input any kind of information, 00534 * it is left to the derived classes to provide alternative ways of doing 00535 * this: other apply()-methods or use()-methods to indicate the data set 00536 * to be used in the algorithm evaluation. 00537 * 00538 * The Pareto Front will be represented by a matrix of size \e m x 00539 * \e n with \e m parameters::externalPopulationSize and \e n 00540 * fitnessSpaceDimensionality. 00541 * 00542 * For each row in this matrix (an individual), the corresponding 00543 * parameter setting (a phenotype) is returned. Since different 00544 * algorithms usually use different parameters, this can only be done with 00545 * dynamically allocated objects. You need to take care of them, but this 00546 * class provides an handy trash() method, which deletes them for you if 00547 * you want to. If you don't remove them properly you will have a huge 00548 * memory-leak! Therefore, we recomend the use of trash(). 00549 * 00550 * @param front matrix containing for each row the multidimensional 00551 * fitness value. 00552 * @param phenotypes the parameters for each fitness value. 00553 * 00554 * @return true if apply successful or false otherwise. 00555 */ 00556 virtual bool resume(matrix<double>& front, 00557 std::vector<functor::parameters*>& phenotypes); 00558 00559 00560 /** 00561 * @name Additional Information 00562 */ 00563 //@{ 00564 /** 00565 * Fitness space bounding box. 00566 * 00567 * After determining the Pareto Front, you usually want to know the 00568 * bounding box of the fitness space that was analyzed. This for 00569 * visualization purposes among others. 00570 * 00571 * After the apply() methods you can get the bounding box calling this 00572 * one. 00573 * 00574 * This will always be a matrix 2 x parameters::fitnessSpaceDimensionality, 00575 * where the first row contains the minima for each dimension and the 00576 * second row the maxima. 00577 */ 00578 void getAnalyzedBox(matrix<double>& bbox) const; 00579 00580 /** 00581 * Dominated individuals. 00582 * 00583 * This method returns all points in the fitness space that were 00584 * analyzed while constructing the feature space. For this method 00585 * to return something meaningful, you need to set in the 00586 * parameters the attribute logAllEvaluations to \c true. 00587 * 00588 * The resulting matrix will have the 00589 * size \e n x parameters::fitnessSpaceDimensionality, where \e n is 00590 * the number of dominated individuals analyzed in the process. 00591 */ 00592 void getDominatedIndividuals(matrix<double>& dindiv) const; 00593 00594 /** 00595 * Get data from log 00596 * 00597 * If a log file is generated, usually you cannot read the used 00598 * parameterization. With this method you will get from the log file the 00599 * list of parameters and their corresponding fitness values, as if you 00600 * had used the corresponding apply method. 00601 * 00602 * The parameters of the current functor will change without invalidating 00603 * the reference. Therefore this method is not constant. 00604 */ 00605 bool getDataFromLog(const std::string& logFile, 00606 matrix<double>& front, 00607 std::vector<functor::parameters*>& phenotypes); 00608 00609 //@} 00610 00611 /** 00612 * Copy data of "other" functor. 00613 * @param other the functor to be copied 00614 * @return a reference to this functor object 00615 */ 00616 paretoFront& copy(const paretoFront& other); 00617 00618 /** 00619 * Alias for copy member 00620 * @param other the functor to be copied 00621 * @return a reference to this functor object 00622 */ 00623 paretoFront& operator=(const paretoFront& other); 00624 00625 /** 00626 * Returns a pointer to a clone of this functor. 00627 */ 00628 virtual functor* clone() const = 0; 00629 00630 /** 00631 * Returns used parameters 00632 */ 00633 const parameters& getParameters() const; 00634 00635 /** 00636 * Type used to represent chromosomes 00637 */ 00638 typedef std::vector<bool> chromosome; 00639 00640 /** 00641 * Delete all parameter objects in the given vector 00642 */ 00643 bool trash(std::vector<functor::parameters*> phenotypes) const; 00644 00645 00646 /** 00647 * @name Progress Info 00648 * 00649 * Methods used to plug-in a progress visualization object. 00650 */ 00651 //@{ 00652 /** 00653 * set a progress %object 00654 * 00655 * A clone of the given %object will be generated. 00656 */ 00657 void setProgressObject(const progressInfo& progBox); 00658 00659 /** 00660 * remove the active progress %object 00661 */ 00662 void removeProgressObject(); 00663 00664 /** 00665 * return true if a valid progressInfo %object has already been set 00666 */ 00667 bool validProgressObject() const; 00668 00669 /** 00670 * Return true if a valid progressInfo %object has already been set and 00671 * its detail level is greater or equal the given value. 00672 */ 00673 bool validProgressObject(const int detailLevel) const; 00674 00675 /** 00676 * get progress %object 00677 */ 00678 progressInfo& getProgressObject(); 00679 00680 /** 00681 * get progress %object 00682 */ 00683 const progressInfo& getProgressObject() const; 00684 00685 //@} 00686 00687 00688 00689 /** 00690 * @name Public methods to be reimplemented 00691 * 00692 * Following methods need to be reimplemented to evaluate specific 00693 * algorithms. 00694 */ 00695 //@{ 00696 00697 /** 00698 * Convert a binary-chain representation of a chromosome to a valid 00699 * parameter object. 00700 * 00701 * There are some tools to convert standard types into some binary chains, 00702 * which can be used by all derived classes: 00703 * - binToInt() 00704 * - binToUInt() 00705 * - binToDouble() 00706 */ 00707 virtual bool chromosomeToPhenotype(const chromosome& genotype, 00708 functor::parameters& phenotype) const=0; 00709 00710 /** 00711 * Return a fresh allocated parameters for the evaluated functor, which is 00712 * equivalent to the given genotype. 00713 * 00714 * There are some tools to convert binary chains into standard types: 00715 * - intToBin() 00716 * - uintToBin() 00717 * - doubleToBin() 00718 */ 00719 virtual functor::parameters* 00720 chromosomeToPhenotype(const chromosome& genotype) const = 0; 00721 00722 /** 00723 * Convert a valid parameters object (phenotype) into binary-chain 00724 * representation of a chromosome. 00725 */ 00726 virtual bool phenotypeToChromosome(const functor::parameters& phenotype, 00727 chromosome& genotype) const=0; 00728 00729 /** 00730 * Return the length in bits for a chromosome. 00731 * 00732 * This method needs to be reimplemented, in order to get some 00733 * default implementations to work. 00734 */ 00735 virtual int getChromosomeSize() const = 0; 00736 00737 /** 00738 * Evaluate Chromosome 00739 * 00740 * This method is one of the most important ones for the pareto evaluation. 00741 * Its task is to produce a multidimensional fitness measure for a given 00742 * chromosome. 00743 * 00744 * It returns true if the evaluation was successful, of false if the 00745 * phenotype represents some invalid parameterization. It is highly 00746 * recomended that the mutation and crossover methods are reimplemented to 00747 * avoid invalid parameterizations. 00748 * 00749 * There are mainly two types of fitness measures that can be 00750 * analyzed with a functor of this kind: empirical goodness and 00751 * empirical discrepancy (Zhang). The empirical goodness computes some 00752 * measure using exclusively the test data, without requiring any ground 00753 * truth. The empirical discrepancy assumes the existency of ground truth 00754 * and provides as measure some distance between the result of an algorithm 00755 * and the ground truth. Each class derived from paretoFront should 00756 * specify clearly which kind of fitness measures it provides. 00757 * 00758 */ 00759 virtual bool evaluateChromosome(const chromosome& individual, 00760 dvector& fitness) = 0; 00761 00762 //@} 00763 00764 /** 00765 * @name Conversion Tools 00766 * 00767 * Converting between phenotypes and binary chains representing 00768 * chromosomes can become a tedious task. These tools should facilitate 00769 * the process. 00770 */ 00771 //@{ 00772 /** 00773 * Convert binary chain into an integer representation. 00774 * 00775 * The lower indices in the chain represent the MSB of the number. 00776 * 00777 * @param chain the binary chain representing a chromosome 00778 * @param startBit index of the bit, at which the integer begins 00779 * @param bitLength number of bits used for this integer (never greater 00780 * than 32). 00781 * @param result integer value read from the chain 00782 * @return the next valid index in the chain, chain.size() if it was 00783 * completely read, or -1 if the request goes beyond the 00784 * size of the chain. 00785 */ 00786 int binToInt(const chromosome& chain, 00787 const int startBit, 00788 const int bitLength, 00789 int32& result) const; 00790 00791 /** 00792 * Convert binary chain into an integer representation. 00793 * 00794 * The lower indices in the chain represent the MSB of the number. 00795 * 00796 * If the given value range is higher than the range representable with 00797 * the given number of bits, only the lowLimit will be considered. If 00798 * the read number has a higher range than the high-low one, then the 00799 * modulo operation will be applied to keep the values in range. 00800 * 00801 * @param chain the binary chain representing a chromosome 00802 * @param startBit index of the bit, at which the integer begins 00803 * @param bitLength number of bits used for this integer (never greater 00804 * than 32). 00805 * @param lowLimit lowest value (inclusive) that the stored variable 00806 * can take. 00807 * @param highLimit highest value (inclusive) that the stored variable 00808 * can take. 00809 * @param result integer value read from the chain 00810 * @return the next valid index in the chain, chain.size() if it was 00811 * completelly read, or -1 if the request goes beyond the 00812 * size of the chain. 00813 */ 00814 int binToInt(const chromosome& chain, 00815 const int startBit, 00816 const int bitLength, 00817 const int lowLimit, 00818 const int highLimit, 00819 int32& result) const; 00820 00821 00822 /** 00823 * Convert binary chain into an unsigned integer representation. 00824 * 00825 * The lower indices in the chain represent the MSB of the number. 00826 * 00827 * @param chain the binary chain representing a chromosome 00828 * @param startBit index of the bit, at which the integer begins 00829 * @param bitLength number of bits used for this integer (never greater 00830 * than 32). 00831 * @param result integer value read from the chain 00832 * @return the next valid index in the chain, chain.size() if it was 00833 * completelly read, or -1 if the request goes beyond the 00834 * size of the chain. 00835 */ 00836 int binToUInt(const chromosome& chain, 00837 const int startBit, 00838 const int bitLength, 00839 uint32& result) const; 00840 00841 /** 00842 * Convert binary chain into an double floating point representation. 00843 * 00844 * The binarization of floating point values for "genetic" manipulation 00845 * can easily result in non-sense values. Therefore here a specified 00846 * value range is quantized in the desired number of bits between 0 and 00847 * 31. 00848 * 00849 * @param chain the binary chain representing a chromosome 00850 * @param startBit index of the bit, at which the integer begins. 00851 * @param bitLength number of bits used for this double number 00852 * (never greater or equal than 32). 00853 * @param lowLimit lowest value (inclusive) that the stored variable 00854 * can take. 00855 * @param highLimit highest value (inclusive) that the stored variable 00856 * can take. 00857 * @param result double value read from the chain 00858 * @return the next valid index in the chain, chain.size() if it was 00859 * completelly read, or -1 if the request goes beyond the 00860 * size of the chain. 00861 */ 00862 int binToDouble(const chromosome& chain, 00863 const int startBit, 00864 const int bitLength, 00865 const double& lowLimit, 00866 const double& highLimit, 00867 double& result) const; 00868 00869 /** 00870 * Convert integer value in a binary change. 00871 * 00872 * @param value the value to be stored. 00873 * @param startBit starting index in the chain, where the representation 00874 * has to be stored. 00875 * @param bitLength number of bits used to represent the number. If it 00876 * is not possible, then -1 will be returned. 00877 * @param chain binary change with the resulting representation. 00878 * @return the next valid index in the chain, where further data can 00879 * be inserted. If there is not enough space, -2 will be returned 00880 */ 00881 int intToBin(const int value, 00882 const int startBit, 00883 const int bitLength, 00884 chromosome& chain) const; 00885 00886 /** 00887 * Convert integer value in a binary change. 00888 * 00889 * @param value the value to be stored. 00890 * @param startBit starting index in the chain, where the representation 00891 * has to be stored. 00892 * @param bitLength number of bits used to represent the number. If it 00893 * is not possible, then -1 will be returned. 00894 * @param chain binary change with the resulting representation. 00895 * @param lowLimit lowest value (inclusive) that the stored variable 00896 * can take. 00897 * @param highLimit highest value (inclusive) that the stored variable 00898 * can take. 00899 * @return the next valid index in the chain, where further data can 00900 * be inserted. If there is not enough space, -2 will be returned 00901 */ 00902 int intToBin(const int value, 00903 const int startBit, 00904 const int bitLength, 00905 const int lowLimit, 00906 const int highLimit, 00907 chromosome& chain) const; 00908 00909 /** 00910 * Convert integer value in a binary change. 00911 * 00912 * @param value the value to be stored. 00913 * @param startBit starting index in the chain, where the representation 00914 * has to be stored. 00915 * @param bitLength number of bits used to represent the number. If it 00916 * is not possible, then -1 will be returned. 00917 * @param chain binary change with the resulting representation. 00918 * @return the next valid index in the chain, where further data can 00919 * be inserted. If there is not enough space, -2 will be returned 00920 */ 00921 int uintToBin(const uint32 value, 00922 const int startBit, 00923 const int bitLength, 00924 chromosome& chain) const; 00925 00926 /** 00927 * Convert double value in a binary change. 00928 * 00929 * @param value value to be quantized. 00930 * @param startBit starting index in the chain, where the representation 00931 * has to be stored. 00932 * @param bitLength number of bits used to represent the number. If it 00933 * is not possible, then -1 will be returned. 00934 * @param lowLimit lowest value (inclusive) that the stored variable 00935 * can take. 00936 * @param highLimit highest value (inclusive) that the stored variable 00937 * can take. 00938 * @param chain binary change with the resulting representation. 00939 */ 00940 int doubleToBin(const double& value, 00941 const int startBit, 00942 const int bitLength, 00943 const double& lowLimit, 00944 const double& highLimit, 00945 chromosome& chain) const; 00946 00947 //@} 00948 00949 00950 protected: 00951 /** 00952 * Returns used parameters 00953 */ 00954 parameters& getRWParameters(); 00955 00956 /** 00957 * Structure characterizing an individual 00958 */ 00959 class individual { 00960 public: 00961 /** 00962 * Constructor 00963 */ 00964 individual() : fitness(),genotype(),squeezeFactor(0.0) {} 00965 00966 /** 00967 * An individual is here "smaller" than another one if 00968 * its squeezeFactor is smaller. This is required to sort 00969 * the individuals after their squeeze factor, which simplifies 00970 * getting the smallest or greatest elements. 00971 */ 00972 inline bool operator<(const individual& other) const { 00973 return (squeezeFactor < other.squeezeFactor); 00974 } 00975 00976 /** 00977 * Fitness of the individual 00978 */ 00979 dvector fitness; 00980 00981 /** 00982 * Chromosome. 00983 * 00984 * You can get the phenotype anytime with cromosomeToPhenotype 00985 */ 00986 chromosome genotype; 00987 00988 /** 00989 * Factor computed to determine which candidates should be taken 00990 * for mutation or crossover. Dense locations in the fitness space 00991 * get a high squeeze factor. 00992 */ 00993 double squeezeFactor; 00994 }; 00995 00996 /** 00997 * Initialize the internal population. 00998 * 00999 * You usually don't want to reimplement this method, but 01000 * randomIndividual(). However, you can reimplemented it if you 01001 * need more that the standard generation of 01002 * parameters::internalPopulationSize random individuals. 01003 * 01004 * @return true if successful, false otherwise. 01005 */ 01006 virtual bool initInternalPopulation(std::vector<individual>& data); 01007 01008 /** 01009 * @name Methods to be reimplemented 01010 */ 01011 //@{ 01012 01013 /** 01014 * Generate a random individual. 01015 * 01016 * You usually will need to reimplement this method to ensure that 01017 * the generated random individuals have a valid phenotype, i.e. that 01018 * the chromosome binary representation in "genotype" has an equivalent 01019 * parameter object for the class you are using. 01020 * 01021 * Return true if successful, false otherwise. 01022 */ 01023 virtual bool randomIndividual(chromosome& genotype); 01024 01025 /** 01026 * Mutate the given chromosome. 01027 * 01028 * This should be reimplemented to ensure that the mutation is a 01029 * valid phenotype. 01030 * 01031 * The default implementation flips the bits with the probability 01032 * given in the parameters. 01033 */ 01034 virtual bool mutate(const chromosome& parent, 01035 chromosome& mutant); 01036 01037 /** 01038 * Crossover between two chromosomes. 01039 * 01040 * This should be reimplemented to ensure that the crossover produces a 01041 * valid phenotype. 01042 * 01043 * The default implementation does a so called uniform crossover, in which 01044 * each pair of corresponding bits are exchanged with a probability of 01045 * 0.5, followed by the mutation indicated in the original PESA paper. 01046 */ 01047 virtual bool crossover(const chromosome& parent1, 01048 const chromosome& parent2, 01049 chromosome& child); 01050 01051 //@} 01052 01053 /** 01054 * Get a random value between 0 and 1 01055 */ 01056 inline double random() const { 01057 return crdist.draw(); 01058 }; 01059 01060 private: 01061 /** 01062 * @name PESA Algorithm 01063 */ 01064 //@{ 01065 /** 01066 * The PESA Algorithm 01067 * 01068 * Computes the Pareto front, which will be return as list of individuals 01069 * in PE. 01070 */ 01071 bool pesa(std::vector<individual>& PE,const bool initFromLog=false); 01072 01073 /** 01074 * Insert non-dominated member from PI to PE 01075 * 01076 * Return the number of elements of PI that were inserted in PE. 01077 */ 01078 int insert(std::vector<individual>& PI, 01079 std::vector<individual>& PE); 01080 01081 /** 01082 * Insert non-dominated member into PE 01083 */ 01084 bool insert(individual& genotype, 01085 std::vector<individual>& PE); 01086 01087 /** 01088 * Returns a random individual in the given population, which has 01089 * been selected because it had a smaller squeeze factor in a binary 01090 * tournament. 01091 */ 01092 int binaryTournament(const std::vector<individual>& PE) const; 01093 01094 /** 01095 * Return true if a>b (a dominates b) after the definition used in the 01096 * Pareto literature: 01097 * 01098 * a>b <=> for all i a[i]>=b[i] and it exists one i such that a[i]>b[i] 01099 * 01100 * The arguments a and b represent here multidimensional fitness values. 01101 */ 01102 bool dominate(const dvector& a, 01103 const dvector& b) const; 01104 01105 /** 01106 * Shadow for the parameter mutation rate, fixed to contain only 01107 * values between 0 and 1 01108 */ 01109 double mutationRate; 01110 //@} 01111 01112 /** 01113 * LUT-based method for computing g(x) = exp(-x^2). 01114 * The value given is x and not x^2 01115 * 01116 * Only values between 0 and 3 will produce something, all the rest 01117 * produce zero as output 01118 */ 01119 static const double* expLUT; 01120 01121 /** 01122 * Initialize expLUT 01123 */ 01124 bool initExpLUT(); 01125 01126 /** 01127 * An efficient way to compute g(x)=exp(-x^2) 01128 */ 01129 inline double exp2(const double& x) const; 01130 01131 /** 01132 * Compute the fitness distance between the given two fitness points 01133 */ 01134 inline double fitnessDistance(const dvector& a, 01135 const dvector& b) const; 01136 01137 /** 01138 * Log all evaluation 01139 */ 01140 bool logEvaluations; 01141 01142 /** 01143 * All individuals not belonging to the pareto front are somehow dead! 01144 * 01145 * This will be used only if logEvaluations is true 01146 */ 01147 std::list<individual> deadIndividuals; 01148 01149 /** 01150 * Functor used everywhere to generate values between 0 and 1. 01151 */ 01152 continuousRandomDistribution crdist; 01153 01154 /** 01155 * Bounding box. 01156 * 01157 * The size of this matrix will be: 01158 * 2 x parameters::fitnessSpaceDimensionality 01159 */ 01160 dmatrix bbox; 01161 01162 /** 01163 * Sigmas 01164 * 01165 * The fitness space grid size will be used to compute the std. deviation 01166 * per each axis. 01167 */ 01168 dvector sigmas; 01169 01170 /** 01171 * Initialize log. 01172 * 01173 * This method reinitializes the log. It writes the functor parameters 01174 * and internal configuration. 01175 */ 01176 bool initLog(); 01177 01178 /** 01179 * Make an entry for the individual in the log file (if desired). 01180 * 01181 * @return true if the logging is activated, or false if no log was 01182 * desired. 01183 */ 01184 bool logEntry(const individual& ind,const bool markDead=false); 01185 01186 /** 01187 * Initialize the bounding box 01188 */ 01189 void initBoundingBox(dmatrix& boundingBox) const; 01190 01191 /** 01192 * Update bounding box considering the given fitness space point. 01193 * 01194 * @return true if there was a change in the bounding box, false if 01195 * given point was already within the bounding box. 01196 */ 01197 bool updateBoundingBox(const dvector& pnt, 01198 dmatrix& boundingBox) const; 01199 01200 /** 01201 * Update fitness space subdivision. 01202 * 01203 * This initializes the sigmas based on the bounding box contents. 01204 */ 01205 void updateFitnessSpaceSubdivision(); 01206 01207 /** 01208 * Update density factors. 01209 * 01210 * Recompute all squeeze factors for the individuals in the 01211 * external population 01212 */ 01213 void updateDensityFactors(std::vector<individual>& PE); 01214 01215 /** 01216 * Convert a chromosome into a string, to be saved in the log file 01217 */ 01218 void chromosomeToString(const chromosome& genotype,std::string& str) const; 01219 01220 /** 01221 * Convert a string into a chromosome into a string, to be loaded from 01222 * the log file 01223 */ 01224 void stringToChromosome(const std::string& str, 01225 chromosome& genotype) const; 01226 01227 /** 01228 * current progress %object 01229 */ 01230 progressInfo* progressBox; 01231 01232 /** 01233 * Class used to compare individuals in "scanning order" 01234 */ 01235 struct scanLess; 01236 01237 /** 01238 * Output stream used to write the log 01239 */ 01240 std::ofstream* logOut; 01241 01242 /** 01243 * Lisp-Stream-Handler used for log output 01244 */ 01245 lispStreamHandler olsh; 01246 01247 /** 01248 * Copied from the parameters 01249 */ 01250 bool logFront; 01251 01252 /** 01253 * Get data from log 01254 * 01255 * This method reads the log file and create the corresponding data. 01256 * Since usually the logs are broken (the user breaks the execution of 01257 * a long computing process), this method needs to cope with broken 01258 * files. 01259 * 01260 * @param logFile name of the file with the log 01261 * @param params parameters as written in the log file 01262 * @param data all data found in the log file. 01263 * @param boundingBox bounding box of the data 01264 * @return true if successful. 01265 */ 01266 bool getDataFromLog(const std::string& logFile, 01267 parameters& params, 01268 std::vector<individual>& data, 01269 dmatrix& boundingBox) const; 01270 01271 01272 private: 01273 /** 01274 * Return the gray code of the given number 01275 */ 01276 inline uint32 grayCode(const uint32 i) const; 01277 01278 /** 01279 * Return the integer value corresponding to the given gray code 01280 */ 01281 inline uint32 iGrayCode(const uint32 g) const; 01282 }; 01283 } 01284 01285 #endif 01286