ltiParetoFront.h

00001 /*
00002  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiParetoFront.h
00027  * authors ....: Pablo Alvarado
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 25.11.2003
00030  * revisions ..: $Id: ltiParetoFront.h,v 1.6 2006/03/10 18:52:05 alvarado Exp $
00031  */
00032 
00033 #ifndef _LTI_PARETO_FRONT_H_
00034 #define _LTI_PARETO_FRONT_H_
00035 
00036 #include <list>
00037 #include <vector>
00038 #include <fstream>
00039 #include "ltiVector.h"
00040 #include "ltiMatrix.h"
00041 #include "ltiContinuousRandDist.h"
00042 #include "ltiFunctor.h"
00043 #include "ltiProgressInfo.h"
00044 #include "ltiLispStreamHandler.h"
00045 
00046 namespace lti {
00047   /**
00048    * Pareto Front computation with PESA
00049    *
00050    * The Pareto Front functor provides an evaluation mechanism for algorithms,
00051    * in which the qualitiy of the algorithm cannot be restricted to one single
00052    * scalar measure.  It is adecuate to compare different parameterizations for
00053    * one or several algorithms, which are supposed to solve the same task.
00054    *
00055    * This class generates a "front" in a multidimensional fitness
00056    * space, which represents a trade-off between several fitness
00057    * values, that each derived class explicitelly implements.
00058    *
00059    * The algorithm used here to generate the front is called PESA (Pareto
00060    * Envelope-based Selection Algorithm), and it is described in:
00061    *
00062    * David. W. Corne, Joshua D. Knowles and Martin J. Oates
00063    * The Pareto Envelope-based Selection Algorithm for Multiobjective 
00064    * Optimization. In: Proceedings of the International Conference on Parallel
00065    * Problem Solving from Nature (PPSN VI). (2000) 839-848
00066    *
00067    * A good introduction to the application of this problem for the 
00068    * evaluation of image processing algorithms, specifically to the evaluation
00069    * of segmentation is given in:
00070    *
00071    * Mark Everingham, Henk Muller and Barry Thomas, Evaluating Image
00072    * Segmentation Algorithms using the Pareto Front. In Proceedings of
00073    * the 7th European Conference on Computer Vision (ECCV2002), Part
00074    * IV (LNCS 2353), pages 34-48. Springer, June 2002.
00075    *
00076    * http://www.cs.bris.ac.uk/Tools/Reports/Abstracts/2002-everingham-1.html
00077    *
00078    * This is the parent class for all evaluation algorithms using PESA.  Its
00079    * kern administrates the evolutionary part of the algorithm and the derived
00080    * classes provide the adaptions necessary for each application in
00081    * particular, which include the conversion between functor parameters and a
00082    * binary chain chromosome representation, the computation of the fitness
00083    * measures, etc.
00084    *
00085    * The current class also provides some simple tools that simplify the
00086    * construction of evaluation functors.
00087    *
00088    * There is a deviation from the original paper which is more suitable for
00089    * the evaluation of algorithms used here.  The original algorithms
00090    * separates the fitness space into regular hyperboxes and computes the
00091    * density of individuals in each hyperbox to decide which elements will be
00092    * used for crossover or mutation (those boxes with the smallest density)
00093    * and which elements need to be removed (those with the highest density).
00094    * The computation of the density is in the present functor much more
00095    * computationally expensive, but its computation is nevertheless negligible
00096    * in comparison to the computation of the fitness measures for an
00097    * algorithm:
00098    *
00099    * The current algorithm keeps track of the bounding box of the fitness
00100    * space for which individuals have been created.  This bounding box is used
00101    * to determine the size of a Gaussian kernel, which will be used on each
00102    * individual to compute the influence of all other individuals in the
00103    * front.  This way, the selection is not strongly bounded to the selection
00104    * of a partitioning of the fitness space.
00105    *
00106    * For an example of an evaluation class see lti::locationSearchEvaluation.
00107    *
00108    * \section Progress information
00109    *
00110    * To monitor the progress of the evolutionary process this functor makes use
00111    * of classes derived from lti::progressInfo, where the detail level of
00112    * the substeps can be used to show the information required.
00113    * - Detail Level 0: Only the steps are shown, with the front size and
00114    *                   the numbers of new individuals inserted.
00115    * - Detail Level 1: A line for each individual in the internal
00116    *                   population indicating the start of the evaluation is
00117    *                   shown
00118    * - Detail Level 2: For each individual, the fitness vector computed
00119    *                   is also displayed.
00120    * - Detail Level 3: At the end of each evaluation cicle (all internal
00121    *                   population already evaluated) show 
00122    *                   - New non-dominated child (NNDC) 
00123    *                   - Recently dead individual (RDIn)
00124    *                   - Individual removed by overpopulation (HDRI)
00125    */
00126   class paretoFront : public functor {
00127   public:
00128     /**
00129      * The parameters for the class paretoFront.
00130      *
00131      * These are the general parameters for the Pareto Front computation. 
00132      * For each special evaluation class, more attributes are usually added.
00133      */
00134     class parameters : public functor::parameters {
00135     public:
00136       /**
00137        * Default constructor
00138        */
00139       parameters();
00140 
00141       /**
00142        * Copy constructor
00143        * @param other the parameters object to be copied
00144        */
00145       parameters(const parameters& other);
00146 
00147       /**
00148        * Destructor
00149        */
00150       ~parameters();
00151 
00152       /**
00153        * Returns name of this type
00154        */
00155       const char* getTypeName() const;
00156 
00157       /**
00158        * Copy the contents of a parameters object
00159        * @param other the parameters object to be copied
00160        * @return a reference to this parameters object
00161        */
00162       parameters& copy(const parameters& other);
00163 
00164       /**
00165        * Copy the contents of a parameters object
00166        * @param other the parameters object to be copied
00167        * @return a reference to this parameters object
00168        */
00169       parameters& operator=(const parameters& other);
00170 
00171 
00172       /**
00173        * Returns a pointer to a clone of the parameters
00174        */
00175       virtual functor::parameters* clone() const;
00176 
00177       /**
00178        * Write the parameters in the given ioHandler
00179        * @param handler the ioHandler to be used
00180        * @param complete if true (the default) the enclosing begin/end will
00181        *        be also written, otherwise only the data block will be written.
00182        * @return true if write was successful
00183        */
00184       virtual bool write(ioHandler& handler,const bool& complete=true) const;
00185 
00186       /**
00187        * Read the parameters from the given ioHandler
00188        * @param handler the ioHandler to be used
00189        * @param complete if true (the default) the enclosing begin/end will
00190        *        be also written, otherwise only the data block will be written.
00191        * @return true if write was successful
00192        */
00193       virtual bool read(ioHandler& handler,const bool& complete=true);
00194 
00195 #     ifdef _LTI_MSC_6
00196       /**
00197        * This function is required by MSVC only, as a workaround for a
00198        * very awful bug, which exists since MSVC V.4.0, and still by
00199        * V.6.0 with all bugfixes (so called "service packs") remains
00200        * there...  This method is also public due to another bug, so please
00201        * NEVER EVER call this method directly: use read() instead
00202        */
00203       bool readMS(ioHandler& handler,const bool& complete=true);
00204 
00205       /**
00206        * This function is required by MSVC only, as a workaround for a
00207        * very awful bug, which exists since MSVC V.4.0, and still by
00208        * V.6.0 with all bugfixes (so called "service packs") remains
00209        * there...  This method is also public due to another bug, so please
00210        * NEVER EVER call this method directly: use write() instead
00211        */
00212       bool writeMS(ioHandler& handler,const bool& complete=true) const;
00213 #     endif
00214 
00215       // ------------------------------------------------
00216       // the parameters
00217       // ------------------------------------------------
00218 
00219       /**
00220        * Crossover probability.
00221        *
00222        * The PESA algorithms does a uniform crossover with this
00223        * probability Pc.  This means, with probability Pc a crossover
00224        * between two parents will be done, otherwise only mutation
00225        * will be done.
00226        *
00227        * Default value: 0.7
00228        */
00229       double crossoverProbability;
00230 
00231       /**
00232        * Initial Bit-flip mutation probability.  
00233        *
00234        * This value is usually set to 1/L, where L is the size of a 
00235        * chromosome, i.e. the bit-length size given to the binary parameter
00236        * representation. (see paretoFront::phenotypeToChromosome).
00237        *
00238        * If negative, the value used will be |mutationRate|/L. 
00239        * If positive, the value will be used "as is".
00240        *
00241        * Note that if positive, it only makes sense to have values between 0
00242        * and 1.
00243        *
00244        * This value has to be greater or equal finalMutationRate.
00245        *
00246        * Default value: -1 (i.e. 1/L will be used)
00247        */
00248       double initialMutationRate;
00249 
00250       /**
00251        * Final Bit-flip mutation probability.
00252        *
00253        * This value is usually set to 1/L, where L is the size of a 
00254        * chromosome, i.e. the bit-length size given to the binary parameter
00255        * representation. (see paretoFront::phenotypeToChromosome).
00256        *
00257        * If negative, the value used will be |mutationRate|/L. 
00258        * If positive, the value will be used "as is".
00259        *
00260        * Note that if positive, it only makes sense to have values between 0
00261        * and 1.
00262        *
00263        * This value has to be smaller or equal initialMutationRate.
00264        *
00265        * Default value: -1 (i.e. 1/L will be used)
00266        */
00267       double finalMutationRate;
00268 
00269       /**
00270        * Mutation Rate Decay Value
00271        *
00272        * It is possible to begin the evolution with a higher mutation rate
00273        * than in a "stable" evolution.  This supports a more random search a
00274        * the beginning, where nothing really good has been found.  After a few
00275        * steps can be however desirable to slowly reduce the mutation rate
00276        * into a more normal value.
00277        *
00278        * The ecuation used for the real mutation rate is:
00279        * (initialMutationRate-finalMutationRate)*exp(-i/d) with "i" the
00280        * iteration number and "d" this decay rate value.
00281        *
00282        * The smaller this value, the faster the mutation rate converges to 
00283        * its final value.
00284        *
00285        * This value must be strictly positive (never zero).  If you want a
00286        * "traditional" PESA, just set the initial and final mutation rates
00287        * with the same value.
00288        *
00289        * Default value: 33.38 (i.e. after 100 steps only 5\% of the
00290        *                       (final-initial) interval remains)
00291        */
00292       double mutationDecayRate;
00293 
00294       /**
00295        * Size of elements that constitute the Pareto Front.
00296        *
00297        * Default value: 100
00298        */
00299       int externalPopulationSize;
00300 
00301       /**
00302        * Internal population size
00303        *
00304        * Size of elements produced by each iteration through cross
00305        * over or mutation as candidates for the front.
00306        *
00307        * Default value: 10
00308        */
00309       int internalPopulationSize;
00310 
00311       /**
00312        * Dimensionality of the space analyzed by the Pareto Front.
00313        *
00314        * Note that this is unrelated with the parameter-space, which usually
00315        * has many more dimensions that the fitness space.
00316        *
00317        * Usual values are 2 or 3, since more dimensions are very difficult to
00318        * visualize.
00319        *
00320        * Default value: 2
00321        */
00322       int fitnessSpaceDimensionality;
00323 
00324       /**
00325        * Number of iterations.
00326        *
00327        * The process of generating an internal population and then assign the
00328        * best candidates to the Pareto Front is repeated a number of times
00329        * specified by this parameter.
00330        *
00331        * Note that the total number of evaluations for the algorithms will
00332        * be approximatelly this factor times internalPopulationSize.  If
00333        * you really want at least externalPopulationSize elements in the
00334        * Pareto front, you need to provide enough iterations to allow that,
00335        * which should be considerably greater than 
00336        * externalPopulationSize/internalPopulationSize, since not all
00337        * generated members are added to the pareto front.
00338        *
00339        * Default value: 1000
00340        */
00341       int numOfIterations;
00342 
00343       /**
00344        * Log all evaluated individuals.
00345        *
00346        * Sometimes, for documentation or debug purposes, you will want to know
00347        * all created individuals, even the ones not belonging to the pareto 
00348        * front.  Since they are usually not required, and they demand some
00349        * resources, it is left to you if you want to keep track of them or not.
00350        * 
00351        * Set this parameter to true, if you want to store all generated and
00352        * evaluated individuals, of false, if you want to save the space and
00353        * time required to remember them.
00354        *
00355        * Default value: false
00356        */
00357       bool logAllEvaluations;
00358 
00359       /**
00360        * Fitness space partitioning.
00361        *
00362        * The choice which individual(s) in the Pareto front should be chosen
00363        * for crossover or mutation is taken on a fitness-space density measure.
00364        * Elements will be removed from the dense locations, since there are
00365        * enough prototypes for those places, and for the generation of new
00366        * ones candidates are taken from the low-density regions.
00367        *
00368        * The bounding box for the fitness space will be computed
00369        * automatically as new elements are generated.
00370        * To determine the density at each location, a Gaussian
00371        * kernel will be used.  Its covariance matrix is assumed diagonal, where
00372        * each dimension will have as std. deviation a sixth of the length
00373        * obtaind dividing the interval with the given factor.
00374        *
00375        * In the original PESA paper a fixed grid was used, but this has 
00376        * limitations in the reachable precision.
00377        *
00378        * Default value: 32
00379        */
00380       int fitnessSpacePartition;
00381 
00382       /**
00383        * Sort result in scanning order.
00384        *
00385        * If true, the individuals of the front will be sorted in ascending
00386        * order of their multidimensional fitness.
00387        *
00388        * In principle, this sorting has no semantical effects about the
00389        * overall fitness of an individual, i.e. an individual later in the
00390        * list is not necessarily better than another one with a smaller index.
00391        * This sorting is more oriented towards drawing tasks for the Pareto
00392        * front.
00393        * 
00394        * According to the fitness scanning ordering, and individual A greater
00395        * than an individual B if
00396        *
00397        * \code
00398        *  (A[n-1] > B[n-1]) or 
00399        * ((A[n-1] == A[n-1]) and ( (A[n-2] > A[n-2]) or
00400        *                          ((A[n-2] == A[n-2]) and (A[n-3] > A[n-3]))
00401        *                           ... ))
00402        * \endcode
00403        *
00404        * This is the same ordering employed for lti::tpoint<T>
00405        *
00406        * Sorting will use the STL methods to sort efficiently the result. 
00407        * However, it is optional in case you don't care how the individuals
00408        * are sorted.
00409        *
00410        * Default value: false
00411        */
00412       bool sortResult;
00413 
00414       /**
00415        * @name Log options
00416        */
00417       //@{
00418       /**
00419        * Activate log.
00420        *
00421        * If true, every new individual that is inserted to the front will be
00422        * logged in the given file.  Later on, you can use a special apply to
00423        * continue the analysis of a broken progress.
00424        *
00425        * Default value: false
00426        */
00427       bool logFront;
00428 
00429       /**
00430        * Log Filename.
00431        *
00432        * Filename used for the log of patterns.  The data will be written in
00433        * an ASCII format using a lti::lispStreamHandler.  It will contain for
00434        * each individual the chromosome binary representation and the computed
00435        * fitness.  At the beginning it will save all parameters necessary to
00436        * bring this functor to a compatible state.
00437        *
00438        * Default value: "pareto.log"
00439        */
00440       std::string logFilename;
00441       //@}
00442     };
00443 
00444     /**
00445      * Default constructor
00446      */
00447     paretoFront();
00448 
00449     /**
00450      * Construct a functor using the given parameters
00451      */
00452     paretoFront(const parameters& par);
00453 
00454     /**
00455      * Copy constructor
00456      * @param other the object to be copied
00457      */
00458     paretoFront(const paretoFront& other);
00459 
00460     /**
00461      * Destructor
00462      */
00463     virtual ~paretoFront();
00464 
00465     /**
00466      * Returns the name of this type ("paretoFront")
00467      */
00468     virtual const char* getTypeName() const;
00469 
00470     /**
00471      * Compute the Pareto Front
00472      * 
00473      * Since the evaluation function can get as input any kind of information,
00474      * it is left to the derived classes to provide alternative way of doing
00475      * this: other apply methods or use() methods to indicate the set of data
00476      * to be used.  
00477      * 
00478      * The Pareto Front will be represent by a matrix of size \e m x
00479      * \e n with \e m parameters::externalPopulationSize and \e n
00480      * fitnessSpaceDimensionality.
00481      *
00482      * @param front matrix containing for each row the multidimensional 
00483      *             fitness value.
00484      *
00485      * @return true if apply successful or false otherwise.
00486      */
00487     virtual bool apply(matrix<double>& front);
00488 
00489     /**
00490      * Compute the Pareto Front and the corresponding parameter objects
00491      * for each non-dominated point.
00492      * 
00493      * Since the evaluation function can get as input any kind of information,
00494      * it is left to the derived classes to provide alternative ways of doing
00495      * this: other apply methods or use() methods to indicate the data set
00496      * to be used in the algorithm evaluation.
00497      * 
00498      * The Pareto Front will be represented by a matrix of size \e m x
00499      * \e n with \e m parameters::externalPopulationSize and \e n
00500      * fitnessSpaceDimensionality.
00501      *
00502      * For each row in this matrix (an individual), the corresponding
00503      * parameter setting (a phenotype) is returned.  Since different
00504      * algorithms usually use different parameters, this can only be
00505      * done with dynamically allocated objects.  You need to take care of
00506      * them, but this class provides an utility trash() method, which deletes
00507      * them for you if you want to. 
00508      * 
00509      * @param front matrix containing for each row the multidimensional 
00510      *             fitness value.
00511      * @param phenotypes the parameters for each fitness value.
00512      *
00513      * @return true if apply successful or false otherwise.
00514      */
00515     virtual bool apply(matrix<double>& front,
00516                        std::vector<functor::parameters*>& phenotypes);
00517 
00518 
00519     /**
00520      * Compute the Pareto Front and the corresponding parameter objects
00521      * for each non-dominated point.
00522      *
00523      * This is a special method which reassumes a broken analysis, gaining
00524      * the lost information from a previously generated file.  That file
00525      * can be generated activating in the parameters the option logFront and
00526      * indicating there the name for the file.
00527      *
00528      * This method will work only if the parameters logFront and logFilename
00529      * are properly set (logFront in true and logFilename with the proper
00530      * filename).  It is not named "apply" for the simple reason that the
00531      * parameters will be changed to those stored in the log-file itself.
00532      *
00533      * Since the evaluation function can get as input any kind of information,
00534      * it is left to the derived classes to provide alternative ways of doing
00535      * this: other apply()-methods or use()-methods to indicate the data set
00536      * to be used in the algorithm evaluation.
00537      * 
00538      * The Pareto Front will be represented by a matrix of size \e m x
00539      * \e n with \e m parameters::externalPopulationSize and \e n
00540      * fitnessSpaceDimensionality.
00541      *
00542      * For each row in this matrix (an individual), the corresponding
00543      * parameter setting (a phenotype) is returned.  Since different
00544      * algorithms usually use different parameters, this can only be done with
00545      * dynamically allocated objects.  You need to take care of them, but this
00546      * class provides an handy trash() method, which deletes them for you if
00547      * you want to.  If you don't remove them properly you will have a huge
00548      * memory-leak!  Therefore, we recomend the use of trash().
00549      * 
00550      * @param front matrix containing for each row the multidimensional 
00551      *             fitness value.
00552      * @param phenotypes the parameters for each fitness value.
00553      *
00554      * @return true if apply successful or false otherwise.
00555      */
00556     virtual bool resume(matrix<double>& front,
00557                         std::vector<functor::parameters*>& phenotypes);
00558 
00559 
00560     /**
00561      * @name Additional Information
00562      */
00563     //@{
00564     /**
00565      * Fitness space bounding box.
00566      *
00567      * After determining the Pareto Front, you usually want to know the
00568      * bounding box of the fitness space that was analyzed.  This for 
00569      * visualization purposes among others.
00570      *
00571      * After the apply() methods you can get the bounding box calling this
00572      * one.
00573      *
00574      * This will always be a matrix 2 x parameters::fitnessSpaceDimensionality,
00575      * where the first row contains the minima for each dimension and the
00576      * second row the maxima.
00577      */
00578     void getAnalyzedBox(matrix<double>& bbox) const;
00579 
00580     /**
00581      * Dominated individuals.
00582      *
00583      * This method returns all points in the fitness space that were
00584      * analyzed while constructing the feature space.  For this method
00585      * to return something meaningful, you need to set in the
00586      * parameters the attribute logAllEvaluations to \c true.
00587      *
00588      * The resulting matrix will have the 
00589      * size \e n x parameters::fitnessSpaceDimensionality, where \e n is
00590      * the number of dominated individuals analyzed in the process.
00591      */
00592     void getDominatedIndividuals(matrix<double>& dindiv) const;
00593 
00594     /**
00595      * Get data from log
00596      *
00597      * If a log file is generated, usually you cannot read the used 
00598      * parameterization.  With this method you will get from the log file the
00599      * list of parameters and their corresponding fitness values, as if you
00600      * had used the corresponding apply method.
00601      *
00602      * The parameters of the current functor will change without invalidating
00603      * the reference.  Therefore this method is not constant.
00604      */
00605     bool getDataFromLog(const std::string& logFile,
00606                         matrix<double>& front,
00607                         std::vector<functor::parameters*>& phenotypes);
00608 
00609     //@}
00610 
00611     /**
00612      * Copy data of "other" functor.
00613      * @param other the functor to be copied
00614      * @return a reference to this functor object
00615      */
00616     paretoFront& copy(const paretoFront& other);
00617 
00618     /**
00619      * Alias for copy member
00620      * @param other the functor to be copied
00621      * @return a reference to this functor object
00622      */
00623     paretoFront& operator=(const paretoFront& other);
00624 
00625     /**
00626      * Returns a pointer to a clone of this functor.
00627      */
00628     virtual functor* clone() const = 0;
00629 
00630     /**
00631      * Returns used parameters
00632      */
00633     const parameters& getParameters() const;
00634 
00635     /**
00636      * Type used to represent chromosomes
00637      */
00638     typedef std::vector<bool> chromosome;
00639 
00640     /**
00641      * Delete all parameter objects in the given vector
00642      */
00643     bool trash(std::vector<functor::parameters*> phenotypes) const;
00644 
00645 
00646     /**
00647      * @name Progress Info
00648      *
00649      * Methods used to plug-in a progress visualization object.
00650      */
00651     //@{
00652     /**
00653      * set a progress %object
00654      *
00655      * A clone of the given %object will be generated.
00656      */
00657     void setProgressObject(const progressInfo& progBox);
00658 
00659     /**
00660      * remove the active progress %object
00661      */
00662     void removeProgressObject();
00663 
00664     /**
00665      * return true if a valid progressInfo %object has already been set
00666      */
00667     bool validProgressObject() const;
00668 
00669     /**
00670      * Return true if a valid progressInfo %object has already been set and
00671      * its detail level is greater or equal the given value.
00672      */
00673     bool validProgressObject(const int detailLevel) const;
00674 
00675     /**
00676      * get progress %object
00677      */
00678     progressInfo& getProgressObject();
00679 
00680     /**
00681      * get progress %object
00682      */
00683     const progressInfo& getProgressObject() const;
00684 
00685     //@}
00686 
00687 
00688 
00689     /**
00690      * @name Public methods to be reimplemented
00691      *
00692      * Following methods need to be reimplemented to evaluate specific
00693      * algorithms.
00694      */
00695     //@{
00696 
00697     /**
00698      * Convert a binary-chain representation of a chromosome to a valid
00699      * parameter object.
00700      *
00701      * There are some tools to convert standard types into some binary chains,
00702      * which can be used by all derived classes:
00703      * - binToInt()
00704      * - binToUInt()
00705      * - binToDouble()
00706      */
00707     virtual bool chromosomeToPhenotype(const chromosome& genotype,
00708                                        functor::parameters& phenotype) const=0;
00709 
00710     /**
00711      * Return a fresh allocated parameters for the evaluated functor, which is
00712      * equivalent to the given genotype.
00713      *
00714      * There are some tools to convert binary chains into standard types:
00715      * - intToBin()
00716      * - uintToBin()
00717      * - doubleToBin()
00718      */
00719     virtual functor::parameters* 
00720     chromosomeToPhenotype(const chromosome& genotype) const = 0;
00721 
00722     /**
00723      * Convert a valid parameters object (phenotype) into binary-chain
00724      * representation of a chromosome.
00725      */
00726     virtual bool phenotypeToChromosome(const functor::parameters& phenotype,
00727                                        chromosome& genotype) const=0;
00728 
00729     /**
00730      * Return the length in bits for a chromosome.
00731      *
00732      * This method needs to be reimplemented, in order to get some 
00733      * default implementations to work.
00734      */
00735     virtual int getChromosomeSize() const = 0;
00736 
00737     /**
00738      * Evaluate Chromosome
00739      *
00740      * This method is one of the most important ones for the pareto evaluation.
00741      * Its task is to produce a multidimensional fitness measure for a given
00742      * chromosome.
00743      *
00744      * It returns true if the evaluation was successful, of false if the
00745      * phenotype represents some invalid parameterization.  It is highly 
00746      * recomended that the mutation and crossover methods are reimplemented to 
00747      * avoid invalid parameterizations.
00748      *
00749      * There are mainly two types of fitness measures that can be
00750      * analyzed with a functor of this kind: empirical goodness and
00751      * empirical discrepancy (Zhang).  The empirical goodness computes some
00752      * measure using exclusively the test data, without requiring any ground
00753      * truth.  The empirical discrepancy assumes the existency of ground truth
00754      * and provides as measure some distance between the result of an algorithm
00755      * and the ground truth.  Each class derived from paretoFront should
00756      * specify clearly which kind of fitness measures it provides.
00757      * 
00758      */
00759     virtual bool evaluateChromosome(const chromosome& individual,
00760                                     dvector& fitness) = 0; 
00761 
00762     //@}
00763 
00764     /**
00765      * @name Conversion Tools
00766      *
00767      * Converting between phenotypes and binary chains representing
00768      * chromosomes can become a tedious task.  These tools should facilitate
00769      * the process.
00770      */
00771     //@{
00772     /**
00773      * Convert binary chain into an integer representation.
00774      *
00775      * The lower indices in the chain represent the MSB of the number.
00776      * 
00777      * @param chain the binary chain representing a chromosome
00778      * @param startBit index of the bit, at which the integer begins
00779      * @param bitLength number of bits used for this integer (never greater
00780      *                  than 32).
00781      * @param result integer value read from the chain
00782      * @return the next valid index in the chain, chain.size() if it was
00783      *         completely read, or -1 if the request goes beyond the 
00784      *         size of the chain.
00785      */
00786     int binToInt(const chromosome& chain,
00787                  const int startBit,
00788                  const int bitLength,
00789                  int32& result) const;
00790 
00791     /**
00792      * Convert binary chain into an integer representation.
00793      *
00794      * The lower indices in the chain represent the MSB of the number.
00795      * 
00796      * If the given value range is higher than the range representable with
00797      * the given number of bits, only the lowLimit will be considered.  If
00798      * the read number has a higher range than the high-low one, then the 
00799      * modulo operation will be applied to keep the values in range.
00800      * 
00801      * @param chain the binary chain representing a chromosome
00802      * @param startBit index of the bit, at which the integer begins
00803      * @param bitLength number of bits used for this integer (never greater
00804      *                  than 32).
00805      * @param lowLimit lowest value (inclusive) that the stored variable
00806      *                 can take.
00807      * @param highLimit highest value (inclusive) that the stored variable
00808      *                  can take.
00809      * @param result integer value read from the chain
00810      * @return the next valid index in the chain, chain.size() if it was
00811      *         completelly read, or -1 if the request goes beyond the 
00812      *         size of the chain.
00813      */
00814     int binToInt(const chromosome& chain,
00815                  const int startBit,
00816                  const int bitLength,
00817                  const int lowLimit,
00818                  const int highLimit,
00819                  int32& result) const;
00820 
00821 
00822     /**
00823      * Convert binary chain into an unsigned integer representation.
00824      *
00825      * The lower indices in the chain represent the MSB of the number.
00826      * 
00827      * @param chain the binary chain representing a chromosome
00828      * @param startBit index of the bit, at which the integer begins
00829      * @param bitLength number of bits used for this integer (never greater
00830      *                  than 32).
00831      * @param result integer value read from the chain
00832      * @return the next valid index in the chain, chain.size() if it was
00833      *         completelly read, or -1 if the request goes beyond the 
00834      *         size of the chain.
00835      */
00836     int binToUInt(const chromosome& chain,
00837                   const int startBit,
00838                   const int bitLength,                  
00839                   uint32& result) const;
00840 
00841     /**
00842      * Convert binary chain into an double floating point representation.
00843      *
00844      * The binarization of floating point values for "genetic" manipulation
00845      * can easily result in non-sense values.  Therefore here a specified
00846      * value range is quantized in the desired number of bits between 0 and
00847      * 31.
00848      *     
00849      * @param chain the binary chain representing a chromosome
00850      * @param startBit index of the bit, at which the integer begins.
00851      * @param bitLength number of bits used for this double number
00852      *                  (never greater or equal than 32).
00853      * @param lowLimit lowest value (inclusive) that the stored variable
00854      *                 can take.
00855      * @param highLimit highest value (inclusive) that the stored variable
00856      *                  can take.
00857      * @param result double value read from the chain
00858      * @return the next valid index in the chain, chain.size() if it was
00859      *         completelly read, or -1 if the request goes beyond the 
00860      *         size of the chain.
00861      */
00862     int binToDouble(const chromosome& chain,
00863                     const int startBit,
00864                     const int bitLength,
00865                     const double& lowLimit,
00866                     const double& highLimit,
00867                     double& result) const;
00868 
00869     /**
00870      * Convert integer value in a binary change.
00871      *
00872      * @param value the value to be stored.
00873      * @param startBit starting index in the chain, where the representation
00874      *                 has to be stored.
00875      * @param bitLength number of bits used to represent the number.  If it
00876      *                  is not possible, then -1 will be returned.
00877      * @param chain binary change with the resulting representation.
00878      * @return the next valid index in the chain, where further data can
00879      *         be inserted.  If there is not enough space, -2 will be returned
00880      */
00881     int intToBin(const int value,
00882                  const int startBit,
00883                  const int bitLength,
00884                  chromosome& chain) const;
00885 
00886     /**
00887      * Convert integer value in a binary change.
00888      *
00889      * @param value the value to be stored.
00890      * @param startBit starting index in the chain, where the representation
00891      *                 has to be stored.
00892      * @param bitLength number of bits used to represent the number.  If it
00893      *                  is not possible, then -1 will be returned.
00894      * @param chain binary change with the resulting representation.
00895      * @param lowLimit lowest value (inclusive) that the stored variable
00896      *                 can take.
00897      * @param highLimit highest value (inclusive) that the stored variable
00898      *                  can take.
00899      * @return the next valid index in the chain, where further data can
00900      *         be inserted.  If there is not enough space, -2 will be returned
00901      */
00902     int intToBin(const int value,
00903                  const int startBit,
00904                  const int bitLength,
00905                  const int lowLimit,
00906                  const int highLimit,
00907                  chromosome& chain) const;
00908 
00909     /**
00910      * Convert integer value in a binary change.
00911      *
00912      * @param value the value to be stored.
00913      * @param startBit starting index in the chain, where the representation
00914      *                 has to be stored.
00915      * @param bitLength number of bits used to represent the number.  If it
00916      *                  is not possible, then -1 will be returned.
00917      * @param chain binary change with the resulting representation.
00918      * @return the next valid index in the chain, where further data can
00919      *         be inserted.  If there is not enough space, -2 will be returned
00920      */
00921     int uintToBin(const uint32 value,
00922                   const int startBit,
00923                   const int bitLength,
00924                   chromosome& chain) const;
00925 
00926     /**
00927      * Convert double value in a binary change.
00928      * 
00929      * @param value value to be quantized.
00930      * @param startBit starting index in the chain, where the representation 
00931      *                 has to be stored.
00932      * @param bitLength number of bits used to represent the number.  If it
00933      *                  is not possible, then -1 will be returned.
00934      * @param lowLimit lowest value (inclusive) that the stored variable
00935      *                 can take.
00936      * @param highLimit highest value (inclusive) that the stored variable
00937      *                  can take.
00938      * @param chain binary change with the resulting representation.
00939      */
00940     int doubleToBin(const double& value,
00941                     const int startBit,
00942                     const int bitLength,
00943                     const double& lowLimit,
00944                     const double& highLimit,
00945                     chromosome& chain) const;
00946 
00947     //@}
00948 
00949 
00950   protected:
00951     /**
00952      * Returns used parameters
00953      */
00954     parameters& getRWParameters();
00955 
00956     /**
00957      * Structure characterizing an individual
00958      */
00959     class individual {
00960     public:
00961       /**
00962        * Constructor
00963        */
00964       individual() : fitness(),genotype(),squeezeFactor(0.0) {}
00965 
00966       /**
00967        * An individual is here "smaller" than another one if
00968        * its squeezeFactor is smaller.  This is required to sort
00969        * the individuals after their squeeze factor, which simplifies
00970        * getting the smallest or greatest elements.
00971        */
00972       inline bool operator<(const individual& other) const {
00973         return (squeezeFactor < other.squeezeFactor);
00974       }
00975 
00976       /**
00977        * Fitness of the individual
00978        */
00979       dvector fitness;
00980 
00981       /**
00982        * Chromosome.
00983        *
00984        * You can get the phenotype anytime with cromosomeToPhenotype
00985        */
00986       chromosome genotype;
00987 
00988       /**
00989        * Factor computed to determine which candidates should be taken
00990        * for mutation or crossover.  Dense locations in the fitness space
00991        * get a high squeeze factor.
00992        */
00993       double squeezeFactor;
00994     };
00995 
00996     /**
00997      * Initialize the internal population.
00998      *
00999      * You usually don't want to reimplement this method, but
01000      * randomIndividual().  However, you can reimplemented it if you
01001      * need more that the standard generation of
01002      * parameters::internalPopulationSize random individuals.
01003      *
01004      * @return true if successful, false otherwise.
01005      */
01006     virtual bool initInternalPopulation(std::vector<individual>& data);
01007 
01008     /**
01009      * @name Methods to be reimplemented
01010      */
01011     //@{
01012 
01013     /**
01014      * Generate a random individual.
01015      *
01016      * You usually will need to reimplement this method to ensure that
01017      * the generated random individuals have a valid phenotype, i.e. that
01018      * the chromosome binary representation in "genotype" has an equivalent
01019      * parameter object for the class you are using.
01020      *
01021      * Return true if successful, false otherwise.
01022      */
01023     virtual bool randomIndividual(chromosome& genotype);
01024 
01025     /**
01026      * Mutate the given chromosome.
01027      *
01028      * This should be reimplemented to ensure that the mutation is a 
01029      * valid phenotype.
01030      *
01031      * The default implementation flips the bits with the probability
01032      * given in the parameters.
01033      */
01034     virtual bool mutate(const chromosome& parent,
01035                         chromosome& mutant);
01036 
01037     /**
01038      * Crossover between two chromosomes.
01039      * 
01040      * This should be reimplemented to ensure that the crossover produces a 
01041      * valid phenotype.
01042      *
01043      * The default implementation does a so called uniform crossover, in which
01044      * each pair of corresponding bits are exchanged with a probability of
01045      * 0.5, followed by the mutation indicated in the original PESA paper.
01046      */
01047     virtual bool crossover(const chromosome& parent1,
01048                            const chromosome& parent2,
01049                            chromosome& child);
01050 
01051     //@}
01052 
01053     /**
01054      * Get a random value between 0 and 1
01055      */
01056     inline double random() const {
01057       return crdist.draw();
01058     };
01059 
01060   private:
01061     /** 
01062      * @name PESA Algorithm
01063      */
01064     //@{
01065     /**
01066      * The PESA Algorithm
01067      *
01068      * Computes the Pareto front, which will be return as list of individuals
01069      * in PE.
01070      */
01071     bool pesa(std::vector<individual>& PE,const bool initFromLog=false);
01072 
01073     /**
01074      * Insert non-dominated member from PI to PE
01075      *
01076      * Return the number of elements of PI that were inserted in PE.
01077      */ 
01078     int insert(std::vector<individual>& PI,
01079                std::vector<individual>& PE);
01080 
01081     /**
01082      * Insert non-dominated member into PE
01083      */ 
01084     bool insert(individual& genotype,
01085                 std::vector<individual>& PE);
01086 
01087     /**
01088      * Returns a random individual in the given population, which has
01089      * been selected because it had a smaller squeeze factor in a binary
01090      * tournament.
01091      */
01092     int binaryTournament(const std::vector<individual>& PE) const;
01093     
01094     /**
01095      * Return true if a>b (a dominates b) after the definition used in the
01096      * Pareto literature: 
01097      *
01098      * a>b <=> for all i a[i]>=b[i] and it exists one i such that a[i]>b[i]
01099      * 
01100      * The arguments a and b represent here multidimensional fitness values.
01101      */
01102     bool dominate(const dvector& a,
01103                   const dvector& b) const;
01104     
01105     /**
01106      * Shadow for the parameter mutation rate, fixed to contain only
01107      * values between 0 and 1
01108      */
01109     double mutationRate;
01110     //@}
01111 
01112     /**
01113      * LUT-based method for computing g(x) = exp(-x^2).
01114      * The value given is x and not x^2
01115      *
01116      * Only values between 0 and 3 will produce something, all the rest
01117      * produce zero as output
01118      */
01119     static const double* expLUT;
01120     
01121     /**
01122      * Initialize expLUT
01123      */
01124     bool initExpLUT();
01125 
01126     /**
01127      * An efficient way to compute g(x)=exp(-x^2)
01128      */
01129     inline double exp2(const double& x) const;
01130 
01131     /**
01132      * Compute the fitness distance between the given two fitness points
01133      */
01134     inline double fitnessDistance(const dvector& a,
01135                                   const dvector& b) const;
01136     
01137     /**
01138      * Log all evaluation
01139      */
01140     bool logEvaluations;
01141 
01142     /**
01143      * All individuals not belonging to the pareto front are somehow dead!
01144      *
01145      * This will be used only if logEvaluations is true
01146      */
01147     std::list<individual> deadIndividuals;
01148 
01149     /**
01150      * Functor used everywhere to generate values between 0 and 1.
01151      */
01152     continuousRandomDistribution crdist;
01153 
01154     /**
01155      * Bounding box.
01156      *
01157      * The size of this matrix will be:
01158      * 2 x parameters::fitnessSpaceDimensionality
01159      */
01160     dmatrix bbox;
01161 
01162     /**
01163      * Sigmas
01164      *
01165      * The fitness space grid size will be used to compute the std. deviation
01166      * per each axis.
01167      */
01168     dvector sigmas;
01169 
01170     /**
01171      * Initialize log.
01172      *
01173      * This method reinitializes the log.  It writes the functor parameters
01174      * and internal configuration.
01175      */
01176     bool initLog();
01177 
01178     /**
01179      * Make an entry for the individual in the log file (if desired).
01180      *
01181      * @return true if the logging is activated, or false if no log was
01182      *         desired.
01183      */
01184     bool logEntry(const individual& ind,const bool markDead=false);
01185 
01186     /**
01187      * Initialize the bounding box
01188      */
01189     void initBoundingBox(dmatrix& boundingBox) const;
01190 
01191     /**
01192      * Update bounding box considering the given fitness space point.
01193      *
01194      * @return true if there was a change in the bounding box, false if
01195      * given point was already within the bounding box.
01196      */
01197     bool updateBoundingBox(const dvector& pnt,
01198                                  dmatrix& boundingBox) const;
01199 
01200     /**
01201      * Update fitness space subdivision.
01202      *
01203      * This initializes the sigmas based on the bounding box contents.
01204      */
01205     void updateFitnessSpaceSubdivision();
01206 
01207     /**
01208      * Update density factors.
01209      *
01210      * Recompute all squeeze factors for the individuals in the 
01211      * external population
01212      */
01213     void updateDensityFactors(std::vector<individual>& PE);
01214 
01215     /**
01216      * Convert a chromosome into a string, to be saved in the log file
01217      */
01218     void chromosomeToString(const chromosome& genotype,std::string& str) const;
01219 
01220     /**
01221      * Convert a string into a chromosome into a string, to be loaded from
01222      * the log file
01223      */
01224     void stringToChromosome(const std::string& str,
01225                             chromosome& genotype) const;
01226 
01227     /**
01228      * current progress %object
01229      */
01230     progressInfo* progressBox;
01231 
01232     /**
01233      * Class used to compare individuals in "scanning order"
01234      */
01235     struct scanLess;
01236 
01237     /**
01238      * Output stream used to write the log
01239      */
01240     std::ofstream* logOut;
01241 
01242     /**
01243      * Lisp-Stream-Handler used for log output
01244      */
01245     lispStreamHandler olsh;
01246 
01247     /**
01248      * Copied from the parameters
01249      */
01250     bool logFront;
01251 
01252     /**
01253      * Get data from log
01254      *
01255      * This method reads the log file and create the corresponding data.
01256      * Since usually the logs are broken (the user breaks the execution of
01257      * a long computing process), this method needs to cope with broken
01258      * files.
01259      *
01260      * @param logFile name of the file with the log
01261      * @param params parameters as written in the log file
01262      * @param data all data found in the log file.
01263      * @param boundingBox bounding box of the data
01264      * @return true if successful.
01265      */
01266     bool getDataFromLog(const std::string& logFile,
01267                         parameters& params,
01268                         std::vector<individual>& data,
01269                         dmatrix& boundingBox) const;
01270 
01271 
01272   private:
01273     /**
01274      * Return the gray code of the given number
01275      */
01276     inline uint32 grayCode(const uint32 i) const;
01277 
01278     /**
01279      * Return the integer value corresponding to the given gray code
01280      */
01281     inline uint32 iGrayCode(const uint32 g) const;
01282   };
01283 }
01284 
01285 #endif
01286