LTI-Lib latest version v1.9 - last update 10 Apr 2010

ltiEarthMoversDistance.h

00001 /*
00002  * Copyright (C) 2003, 2004, 2005, 2006
00003  * Yossi Rubner, Computer Science Department, Stanford University
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiEarthMoversDistance.h
00027  * authors ....: Yossi Rubner; Peter Doerfler
00028  * organization: CS, Stanford University; LTI, RWTH Aachen
00029  * creation ...: 29.10.2003
00030  * revisions ..: $Id: ltiEarthMoversDistance.h,v 1.9 2006/02/08 12:18:18 ltilib Exp $
00031  */
00032 
00033 #ifndef _LTI_EARTH_MOVERS_DISTANCE_H_
00034 #define _LTI_EARTH_MOVERS_DISTANCE_H_
00035 
00036 #include "ltiVector.h"
00037 #include "ltiMatrix.h"
00038 #include "ltiGenericMatrix.h"
00039 #include "ltiL2Distance.h"
00040 #include "ltiDistanceFunctor.h"
00041 
00042 #undef _LTI_DEBUG
00043 //#define _LTI_DEBUG 2
00044 #include "ltiDebug.h"
00045 
00046 namespace lti {
00047   /**
00048    * The Earth Mover's Distance (EMD) is a robust distance measure
00049    * between two distributions. Each distribution has a number of
00050    * clusters n,m with corresponding weights. There is a ground
00051    * distance between two clusters which specifies the cost of moving
00052    * some weight from one cluster to the other. The EMD measures the
00053    * relative total cost of transforming one distribution into the
00054    * other by moving the weights around. This is done by solving the
00055    * transportation problem.
00056    *
00057    * Template type C specifies the type of the Cluster centers,
00058    * e.g. dvector or rgbPixel. Type W is the type of vector<W> which
00059    * is used for the weights of the clusters and template type D
00060    * specifies the ground distance to be used for the clusters. It
00061    * must be a Distantor<C> (see e.g. l2Distantor<T>.  Note that
00062    * instead of this calculation the cost matrix can be given in
00063    * parameters::costMatrix.
00064    *
00065    * There are four types of applys:
00066    * -# emd between two vector<W>
00067    * -# emd between two matrix<W>
00068    * -# emd between two matrix<C::value_type>, vector<W>
00069    * -# emd between two std::vector<C>, vector<W>
00070    *
00071    * The first two options are formulated in this way to achieve
00072    * compliance with the definition of distanceFunctor<T>. The cluster
00073    * centers used for the calculation of the cost matrix are taken to
00074    * be the indices of the vector/matrix. If this is not desired a
00075    * cost matrix must be given in the parameters since the Distantor D
00076    * is not used (due to possible type conflicts). Apply-types 3 and 4
00077    * use cluster centers and a weights vector. The cluster centers are
00078    * defined in two ways for convenience. Type 3 is more practical for
00079    * cluster centers that are C= vector<T> -like. Then the matrix<T>
00080    * contains cluster centers in the rows. For C=rgbPixel type 4 is
00081    * more convenient than first constructing a matrix<ubyte> for type
00082    * 3.
00083    *
00084    * Depending on the apply method used the flow between the two
00085    * distributions is also returned. The first distribution is indexed
00086    * in the rows the second in columns. Assuming to clusters for the
00087    * first and three clusters for the second distribution a flow
00088    * matrix could look like this:
00089    * \code
00090    * 0.3   0.1     0.4
00091    * 0     0       0.2
00092    * \endcode
00093    * From the first cluster of distribution1 there is a flow of 0.3 to
00094    * the first cluster, 0.1 to the second and 0.4 to the third cluster
00095    * of distribution2. From the second cluster of distribution1 there
00096    * is a flow of 0.2 to the third cluster of distribution2. From the
00097    * flow %matrix we can see that the weight vectors were [0.8 0.2]
00098    * and [0.3 0.1 0.6] for distributions one and two,
00099    * respectively. The total cost is calculated by summing up the
00100    * products between each flow and its cost.
00101    *
00102    * Note that neither the number of clusters nor the sum of weights
00103    * for the to distributions have to be equal. However, this gives
00104    * probably unwanted results for distances between vectors and matrices.
00105    *
00106    * Unlike other distanceFunctor<T> classes the EMD does not define a
00107    * norm on a distribution. All corresponding apply methods throw an
00108    * InvalidMethod exception.
00109    *
00110    * \b Note: This functor is \b not multi-thread safe!
00111    *
00112    * The core of this functor was implemented by Yossi Rubner,
00113    * Computer Science Department, Stanford University,
00114    * rubner@cs.stanford.edu,
00115    * http://vision.stanford.edu/~rubner. 
00116    *
00117    * Encapsulation and template support originally designed by Peter
00118    * Doerfler, LTI, RWTH-Aachnen.
00119    */
00120   template< class W, class C=vector<W>, class D=l2Distantor<C> >
00121   class earthMoversDistance : public distanceFunctor<W> {
00122   public:
00123     /**
00124      * the parameters for the class earthMoversDistance
00125      */
00126     class parameters : public distanceFunctor<W>::parameters {
00127     public:
00128       /**
00129        * default constructor
00130        */
00131       parameters() : distanceFunctor<W>::parameters() {
00132         
00133         costMatrix = matrix<W>(0,0);
00134         epsilon = 1E-6;
00135         maxSteps = 500;
00136       };
00137 
00138       /**
00139        * copy constructor
00140        * @param other the parameters object to be copied
00141        */
00142       parameters(const parameters& other) : distanceFunctor<W>::parameters() {
00143         copy(other);
00144       }
00145       
00146       /**
00147        * destructor
00148        */
00149       ~parameters() {
00150       };
00151 
00152       /**
00153        * returns name of this type
00154        */
00155       const char* getTypeName() const {
00156         return "earthMoversDistance::parameters";
00157       };
00158 
00159       /**
00160        * copy the contents of a parameters object
00161        * @param other the parameters object to be copied
00162        * @return a reference to this parameters object
00163        */
00164       parameters& copy(const parameters& other) {
00165 #     ifndef _LTI_MSC_6
00166         // MS Visual C++ 6 is not able to compile this...
00167         distanceFunctor<W>::parameters::copy(other);
00168 #     else
00169         // ...so we have to use this workaround.
00170         // Conditional on that, copy may not be virtual.
00171         distanceFunctor<W>::parameters& (distanceFunctor<W>::parameters::* p_copy)
00172           (const distanceFunctor<W>::parameters&) =
00173           distanceFunctor<W>::parameters::copy;
00174         (this->*p_copy)(other);
00175 #     endif
00176 
00177         
00178         costMatrix.copy(other.costMatrix);
00179         epsilon = other.epsilon;
00180         maxSteps = other.maxSteps;
00181         
00182         return *this;
00183       };
00184 
00185       /**
00186        * copy the contents of a parameters object
00187        * @param other the parameters object to be copied
00188        * @return a reference to this parameters object
00189        */
00190       parameters& operator=(const parameters& other) {
00191         return copy(other);
00192       };
00193 
00194       /**
00195        * returns a pointer to a clone of the parameters
00196        */
00197       virtual functor::parameters* clone() const {
00198         return new parameters(*this);
00199       };
00200 
00201 #     ifndef _LTI_MSC_6
00202       /**
00203        * write the parameters in the given ioHandler
00204        * @param handler the ioHandler to be used
00205        * @param complete if true (the default) the enclosing begin/end will
00206        *        be also written, otherwise only the data block will be written.
00207        * @return true if write was successful
00208        */
00209       virtual bool write(ioHandler& handler,const bool complete=true) const
00210 #     else
00211         /**
00212          * this function is required by MSVC only, as a workaround for a
00213          * very awful bug, which exists since MSVC V.4.0, and still by
00214          * V.6.0 with all bugfixes (so called "service packs") remains
00215          * there...  This method is also public due to another bug, so please
00216          * NEVER EVER call this method directly: use write() instead
00217          */
00218         bool writeMS(ioHandler& handler,const bool complete=true) const
00219 #     endif
00220         {
00221           bool b = true;
00222           if (complete) {
00223             b = handler.writeBegin();
00224           }
00225         
00226           if (b) {
00227           
00228             lti::write(handler,"costMatrix",costMatrix);
00229             lti::write(handler,"epsilon",epsilon);
00230             lti::write(handler,"maxSteps",maxSteps);
00231           }
00232 
00233 #     ifndef _LTI_MSC_6
00234           // This is the standard C++ code, which MS Visual C++ 6 is not able to
00235           // compile...
00236           b = b && distanceFunctor<W>::parameters::write(handler,false);
00237 #     else
00238           bool (distanceFunctor<W>::parameters::* p_writeMS)(ioHandler&,
00239                                                              const bool) const =
00240             distanceFunctor<W>::parameters::writeMS;
00241           b = b && (this->*p_writeMS)(handler,false);
00242 #     endif
00243 
00244           if (complete) {
00245             b = b && handler.writeEnd();
00246           }
00247 
00248           return b;
00249         }
00250 
00251 #     ifdef _LTI_MSC_6
00252       /**
00253        * write the parameters in the given ioHandler
00254        * @param handler the ioHandler to be used
00255        * @param complete if true (the default) the enclosing begin/end will
00256        *        be also written, otherwise only the data block will be written.
00257        * @return true if write was successful
00258        */
00259       bool write(ioHandler& handler,
00260                  const bool complete=true) const {
00261         // ...we need this workaround to cope with another really
00262         // awful MSVC bug.
00263         return writeMS(handler,complete);
00264       }
00265 #     endif
00266 
00267 
00268 #     ifndef _LTI_MSC_6
00269       /**
00270        * read the parameters from the given ioHandler
00271        * @param handler the ioHandler to be used
00272        * @param complete if true (the default) the enclosing begin/end will
00273        *        be also written, otherwise only the data block will be written.
00274        * @return true if write was successful
00275        */
00276       virtual bool read(ioHandler& handler,const bool complete=true)
00277 #     else
00278         /**
00279          * this function is required by MSVC only, as a workaround for a
00280          * very awful bug, which exists since MSVC V.4.0, and still by
00281          * V.6.0 with all bugfixes (so called "service packs") remains
00282          * there...  This method is also public due to another bug, so please
00283          * NEVER EVER call this method directly: use read() instead
00284          */
00285         bool readMS(ioHandler& handler,const bool complete=true)
00286 #     endif
00287         {
00288           bool b = true;
00289           if (complete) {
00290             b = handler.readBegin();
00291           }
00292         
00293           if (b) {
00294           
00295             lti::read(handler,"costMatrix",costMatrix);
00296             lti::read(handler,"epsilon",epsilon);
00297             lti::read(handler,"maxSteps",maxSteps);
00298           }
00299 
00300 #       ifndef _LTI_MSC_6
00301           // This is the standard C++ code, which MS Visual C++ 6 is not
00302           // able to compile...
00303           b = b && distanceFunctor<W>::parameters::read(handler,false);
00304 #       else
00305           bool (distanceFunctor<W>::parameters::* p_readMS)(ioHandler&,
00306                                                             const bool) =
00307             distanceFunctor<W>::parameters::readMS;
00308           b = b && (this->*p_readMS)(handler,false);
00309 #       endif
00310 
00311           if (complete) {
00312             b = b && handler.readEnd();
00313           }
00314         
00315           return b;
00316         }
00317 
00318 #     ifdef _LTI_MSC_6
00319       /**
00320        * read the parameters from the given ioHandler
00321        * @param handler the ioHandler to be used
00322        * @param complete if true (the default) the enclosing begin/end will
00323        *        be also written, otherwise only the data block will be written.
00324        * @return true if write was successful
00325        */
00326       bool read(ioHandler& handler,const bool complete=true) {
00327         // ...we need this workaround to cope with another really awful MSVC
00328         // bug.
00329         return readMS(handler,complete);
00330       }
00331 #     endif
00332 
00333       // ------------------------------------------------
00334       // the parameters
00335       // ------------------------------------------------
00336 
00337       //TODO: comment the parameters of your functor
00338       // If you add more parameters manually, do not forget to do following:
00339       // 1. indicate in the default constructor the default values
00340       // 2. make sure that the copy member also copy your new parameters
00341       // 3. make sure that the read and write members also read and
00342       //    write your parameters
00343       
00344 
00345       /**
00346        * Usually the template type D (for distance) of the
00347        * earthMoversDistance is used to calculate the ground
00348        * distance. For some data type, espacially when comparing
00349        * feature vectors, it is difficult to find a distance measure
00350        * which describes the knowledge of relationship between the
00351        * elements of the feature vector correctly. For these cases
00352        * give the costMatrix explicitly here. The ground distance
00353        * measure will be disregarded for the calculations. Note that
00354        * for distances between vectors or matrices of weights the
00355        * diagonal is usually 0 (no cost to move weight between equal
00356        * indices). Default empty.
00357        */
00358       matrix<W> costMatrix;
00359       
00360       /**
00361        * A small value which is equal to the accepted rounding error
00362        * per operation. Default 1E-6.
00363        */
00364       W epsilon;
00365 
00366       /**
00367        * maximum number of steps to find the optimal flow with the
00368        * least cost. Default 500.
00369        */
00370       int maxSteps;
00371     };
00372 
00373     /**
00374      * value type of the cluster type C
00375      */
00376     typedef typename C::value_type cluster_value_type;
00377 
00378     /**
00379      * default constructor
00380      */
00381     earthMoversDistance();
00382 
00383     /**
00384      * Construct a functor using the given parameters
00385      */
00386     earthMoversDistance(const parameters& par);
00387 
00388     /**
00389      * copy constructor
00390      * @param other the object to be copied
00391      */
00392     earthMoversDistance(const earthMoversDistance& other);
00393 
00394     /**
00395      * destructor
00396      */
00397     virtual ~earthMoversDistance();
00398 
00399     /**
00400      * returns the name of this type ("earthMoversDistance")
00401      */
00402     virtual const char* getTypeName() const;
00403 
00404     /**
00405      * copy data of "other" functor.
00406      * @param other the functor to be copied
00407      * @return a reference to this functor object
00408      */
00409     earthMoversDistance& copy(const earthMoversDistance& other);
00410 
00411     /**
00412      * alias for copy member
00413      * @param other the functor to be copied
00414      * @return a reference to this functor object
00415      */
00416     earthMoversDistance& operator=(const earthMoversDistance& other);
00417 
00418     /**
00419      * returns a pointer to a clone of this functor.
00420      */
00421     virtual functor* clone() const;
00422 
00423     /**
00424      * returns used parameters
00425      */
00426     const parameters& getParameters() const;
00427 
00428     /**
00429      * Calculation of norms not available for Earth Mover's Distance!
00430      * @throws lti::functor::invalidMethodException
00431      */
00432     virtual bool apply(const vector<W>& v, W& norm) const {
00433       throw invalidMethodException("Calculation of norms not available for Earth Mover's Distance!");
00434     };
00435 
00436     /**
00437      * Calculation of norms not available for Earth Mover's Distance!
00438      * @throws lti::functor::invalidMethodException
00439      */
00440     virtual W apply(const vector<W>& v) const {
00441       throw invalidMethodException("Calculation of norms not available for Earth Mover's Distance!");
00442     };
00443 
00444     /**
00445      * Calculation of norms not available for Earth Mover's Distance!
00446      * @throws lti::functor::invalidMethodException
00447      */
00448     virtual bool apply(const matrix<W>& m, vector<W>& norms) const {
00449       throw invalidMethodException("Calculation of norms not available for Earth Mover's Distance!");
00450     };
00451 
00452     /**
00453      * Calculation of norms not available for Earth Mover's Distance!
00454      * @throws lti::functor::invalidMethodException
00455      */
00456     virtual bool apply(const matrix<W>& m, W& norm) const {
00457       throw invalidMethodException("Calculation of norms not available for Earth Mover's Distance!");
00458     };
00459 
00460     /**
00461      * Calculation of norms not available for Earth Mover's Distance!
00462      * @throws lti::functor::invalidMethodException
00463      */
00464     virtual W apply(const matrix<W>& m) const {
00465       throw invalidMethodException("Calculation of norms not available for Earth Mover's Distance!");
00466     };
00467 
00468 
00469     /**
00470      * calculate the distance between the vectors a and b
00471      * @param a the first vector<W>
00472      * @param b the second vector<W>
00473      * @param dist the distance between the vectors
00474      * @return false on error -> see status string
00475      */
00476     virtual bool apply(const vector<W>& a, const vector<W>& b,
00477                        W& dist) const;
00478 
00479     /**
00480      * calculate the distance and the flow between the vectors a and b.
00481      * @param a the first vector<W>
00482      * @param b the second vector<W>
00483      * @param dist the distance between the vectors
00484      * @param flow the flow matrix for \a a and \a b
00485      * @return false on error -> see status string
00486      */
00487     virtual bool apply(const vector<W>& a, const vector<W>& b,
00488                        W& dist, matrix<W>& flow) const {
00489       bool ok=apply(a,b,dist);
00490       ok = ok && findFlow(a.size(), b.size(), flow);
00491       return ok;
00492     }
00493 
00494     /**
00495      * calculate the distance between the vectors a and b
00496      * @param a the first vector<W>
00497      * @param b the second vector<W>
00498      * @return the distance between the vectors or zero if calculation failed.
00499      */
00500     virtual W apply(const vector<W>& a, const vector<W>& b) const {
00501       W tmp;
00502       if (apply(a,b,tmp)) {
00503         return tmp;
00504       } else {
00505         return W(0);
00506       }
00507     };
00508     
00509     /**
00510      * calculate the distances between the rows or columns of the
00511      * matrices a and b, determined by the parameters rowWise.
00512      * @param a the first vector<W>
00513      * @param b the second vector<W>
00514      * @param dists the distances between the matrices
00515      * @return false on error -> see status string
00516      */
00517     virtual bool apply(const matrix<W>& a, const matrix<W>& b,
00518                        vector<W>& dists) const;
00519 
00520     /**
00521      * Calculate the distance between each row or column of m
00522      * depending on the value of rowWise and the vector v.
00523      * @param m the matrix<W>
00524      * @param v the vector to be compared with
00525      * @param dest the vector with the distances to the vector v
00526      * @return false on error
00527      */
00528     virtual bool apply(const matrix<W>& m, const vector<W>& v,
00529                        vector<W>& dest) const;
00530 
00531     /**
00532      * calculate something like the distance between the matrices a and b:
00533      * both matrices are seen as vectors.
00534      * @param a the first matrix<W>
00535      * @param b the second matrix<W>
00536      * @param dist the 'distance' between the matrices
00537      * @return false on error -> see status string
00538      */
00539     virtual bool apply(const matrix<W>& a, const matrix<W>& b,
00540                        W& dist) const;
00541 
00542     /**
00543      * calculate something like the distance and the flow between the
00544      * matrices a and b: both matrices are seen as vectors.
00545      * @param a the first matrix<W>
00546      * @param b the second matrix<W>
00547      * @param dist the 'distance' between the matrices
00548      * @param flow the flow matrix for \a a and \a b
00549      * @return false on error -> see status string
00550      */
00551     virtual bool apply(const matrix<W>& a, const matrix<W>& b,
00552                        W& dist, matrix<W>& flow) const {
00553       bool ok=apply(a,b,dist);
00554       ok = ok && findFlow(a.rows()*a.columns(), b.rows()*b.columns(), flow);
00555       return ok;
00556     }
00557 
00558     /**
00559      * calculate something like the distance between the matrices a and b:
00560      * both matrices are seen as vectors.
00561      * @param a the first matrix<W>
00562      * @param b the second matrix<W>
00563      * @return the 'distance' between the matrices, if calculation fails
00564      *         0 is returned.
00565      */
00566     virtual W apply(const matrix<W>& a, const matrix<W>& b) const {
00567       W tmp;
00568       if (apply(a,b,tmp)) {
00569         return tmp;
00570       } else {
00571         return W(0);
00572       }
00573 
00574     };
00575 
00576     /**
00577      * calculate the EMD between to distributions each defined by its
00578      * clusters and the respective weights.
00579      * @param clusters1 each element of the vector contains a centroid of
00580      *                  a cluster of distribution 1
00581      * @param clusters2 each element of the vector contains a centroid of
00582      *                  a cluster of distribution 2
00583      * @param weights1 the weights corresponding to \a clusters1
00584      * @param weights2 the weights corresponding to \a clusters2
00585      * @param dist the EMD between the distributions
00586      * @return false if operation fails.
00587      */
00588     virtual bool apply(const std::vector<C>& clusters1,
00589                        const std::vector<C>& clusters2,
00590                        const vector<W>& weights1,
00591                        const vector<W>& weights2,
00592                        W& dist) const;
00593 
00594     /**
00595      * calculate the EMD and the flow between to distributions each
00596      * defined by its clusters and the respective weights.
00597      * @param clusters1 each element of the vector contains a centroid of
00598      *                  a cluster of distribution 1
00599      * @param clusters2 each element of the vector contains a centroid of
00600      *                  a cluster of distribution 2
00601      * @param weights1 the weights corresponding to \a clusters1
00602      * @param weights2 the weights corresponding to \a clusters2
00603      * @param dist the EMD between the distributions
00604      * @param flow the flow matrix for the two distributions
00605      * @return false if operation fails.
00606      */
00607     virtual bool apply(const std::vector<C>& clusters1,
00608                        const std::vector<C>& clusters2,
00609                        const vector<W>& weights1,
00610                        const vector<W>& weights2,
00611                        W& dist, matrix<W>& flow) const {
00612       bool ok=apply(clusters1, clusters2, weights1, weights2, dist);
00613       ok = ok && findFlow(weights1.size(), weights2.size(), flow);
00614       return ok;
00615     }
00616 
00617     /**
00618      * calculate the EMD between to distributions each defined by its
00619      * clusters and the respective weights.
00620      * @param clusters1 each element of the vector contains a centroid of
00621      *                  a cluster of distribution 1
00622      * @param clusters2 each element of the vector contains a centroid of
00623      *                  a cluster of distribution 2
00624      * @param weights1 the weights corresponding to \a clusters1
00625      * @param weights2 the weights corresponding to \a clusters2
00626      * @return the EMD between the distributions, if calculation fails
00627      *         0 is returned.
00628      */
00629     virtual W apply(const std::vector<C>& clusters1,
00630                     const std::vector<C>& clusters2,
00631                     const vector<W>& weights1,
00632                     const vector<W>& weights2) const {
00633       W tmp;
00634       if (apply(clusters1, clusters2, weights1, weights2, tmp)) {
00635         return tmp;
00636       } else {
00637         return W(0);
00638       }
00639     };
00640 
00641     /**
00642      * calculate the EMD between to distributions each defined by its
00643      * clusters and the respective weights.
00644      * @param clusters1 each row of the matrix contains a centroid of
00645      *                  a cluster of distribution 1
00646      * @param clusters2 each row of the matrix contains a centroid of
00647      *                  a cluster of distribution 2
00648      * @param weights1 the weights corresponding to \a clusters1
00649      * @param weights2 the weights corresponding to \a clusters2
00650      * @param dist the EMD between the distributions
00651      * @return false if operation fails.
00652      */
00653     virtual bool apply(const matrix<cluster_value_type>& clusters1,
00654                        const matrix<cluster_value_type>& clusters2,
00655                        const vector<W>& weights1,
00656                        const vector<W>& weights2,
00657                        W& dist) const;
00658 
00659     /**
00660      * calculate the EMD and the flow between to distributions each
00661      * defined by its clusters and the respective weights.
00662      * @param clusters1 each row of the matrix contains a centroid of
00663      *                  a cluster of distribution 1
00664      * @param clusters2 each row of the matrix contains a centroid of
00665      *                  a cluster of distribution 2
00666      * @param weights1 the weights corresponding to \a clusters1
00667      * @param weights2 the weights corresponding to \a clusters2
00668      * @param dist the EMD between the distributions
00669      * @param flow the flow matrix for the two distributions
00670      * @return false if operation fails.
00671      */
00672     virtual bool apply(const matrix<cluster_value_type>& clusters1,
00673                        const matrix<cluster_value_type>& clusters2,
00674                        const vector<W>& weights1,
00675                        const vector<W>& weights2,
00676                        W& dist, matrix<W>& flow) const {
00677       bool ok=apply(clusters1, clusters2, weights1, weights2, dist);
00678       ok = ok && findFlow(weights1.size(), weights2.size(), flow);
00679       return ok;
00680     }
00681 
00682     /**
00683      * calculate the EMD between to distributions each defined by its
00684      * clusters and the respective weights.
00685      * @param clusters1 each row of the matrix contains a centroid of
00686      *                  a cluster of distribution 1
00687      * @param clusters2 each row of the matrix contains a centroid of
00688      *                  a cluster of distribution 2
00689      * @param weights1 the weights corresponding to \a clusters1
00690      * @param weights2 the weights corresponding to \a clusters2
00691      * @return the EMD between the distributions, if calculation fails
00692      *         0 is returned.
00693      */
00694     virtual W apply(const matrix<cluster_value_type>& clusters1,
00695                     const matrix<cluster_value_type>& clusters2,
00696                     const vector<W>& weights1,
00697                     const vector<W>& weights2) const {
00698       W tmp;
00699       if (apply(clusters1, clusters2, weights1, weights2, tmp)) {
00700         return tmp;
00701       } else {
00702         return W(0);
00703       }
00704     };
00705 
00706   protected:
00707 
00708     /**
00709      * type for a single linked list
00710      */
00711     typedef struct single_node {
00712       int i;
00713       W val;
00714       single_node* next;
00715     } single_node;
00716 
00717     /**
00718      * type for a double linked list
00719      */
00720     typedef struct double_node {
00721       int i,j;
00722       W val;
00723       double_node* nextC;
00724       double_node* nextR;
00725     } double_node;
00726 
00727     /** the number of clusters in the first distribution */
00728     mutable int sz1;
00729     /** the number of clusters in the second distribution */
00730     mutable int sz2;
00731 
00732     /**
00733      * The cost matrix. If parameters::costMatrix is used, this matrix
00734      * uses the same data, else the costMatrix is calculated using the
00735      * ground distance D.
00736      */
00737     mutable matrix<W> costMatrix;
00738 
00739     /**
00740      * The maximum value in the costMatrix
00741      */
00742     mutable W maxCost;
00743 
00744     /**
00745      * The maximum of the total weights of supply and demand
00746      */
00747     mutable W maxWeight;
00748 
00749     /**
00750      * The minimum of the total weights of supply and demand
00751      */
00752     mutable W minWeight;
00753 
00754     /**
00755      * Datastructure containing the basic variables.
00756      */
00757     mutable double_node* basicVars;
00758 
00759     /**
00760      * Pointers from first distribution into the basicVars array.
00761      */
00762     mutable double_node** distr1Basic;
00763 
00764     /**
00765      * Pointers from second distribution into the basicVars array.
00766      */
00767     mutable double_node** distr2Basic;
00768 
00769     /** pointer for efficient handling of basicVars */
00770     mutable double_node* endBasicVars;
00771 
00772     /** pointer for efficient handling of basicVars */
00773     mutable double_node* enterBasicVars;
00774 
00775     /**
00776      * If true the two clusters i and j of the two distributions form
00777      * a basic variable.
00778      */
00779     mutable genericMatrix<bool> isBasicVar;
00780 
00781     /**
00782      * inits data structures. returns true if the sum of weights
00783      * (earth) is significantly different in the two distributions. If
00784      * this is the case the weight vector with less weight gets an
00785      * extra entry with the difference.
00786      */
00787     bool init(vector<W>& weights1, vector<W>& weights2) const;
00788 
00789 
00790     /**
00791      * Finds the costMatrix for the given supply and demand. 
00792      */
00793     bool calcCost(const ivector& supply, const ivector& demand) const;
00794 
00795     /**
00796      * Finds the costMatrix for the given supply and demand. 
00797      */
00798     bool calcCost(const vector<point>& supply,
00799                   const vector<point>& demand) const;
00800 
00801     /**
00802      * Finds the costMatrix for the given supply and demand. 
00803      */
00804     bool calcCost(const std::vector<C>& supply,
00805                   const std::vector<C>& demand) const;
00806 
00807     /**
00808      * Finds the costMatrix for the given supply and demand. 
00809      */
00810     bool calcCost(const matrix<cluster_value_type>& supply, 
00811                   const matrix<cluster_value_type>& demand) const;
00812 
00813     /**
00814      * Actual management of the calculation of the emd.  Parameters
00815      * are the two weights vectors as well as their original
00816      * lengths. The total flow is returned.
00817      */
00818     bool emd(vector<W>& weights1, vector<W>& weights2,
00819              const int& slen, const int& dlen, W& totalFlow) const;
00820 
00821 
00822     /**
00823      * Initial solution of the transportation problem on the given weights.
00824      */
00825     bool russel(vector<W>& weights1, vector<W>& weights2) const;
00826 
00827     /**
00828      * Helper method for russel
00829      */
00830     void addBasicVariable(int minI, int minJ, 
00831                           vector<W>& weights1, vector<W>& weights2,
00832                           single_node *PrevUMinI, single_node *PrevVMinJ,
00833                           single_node *UHead) const;
00834 
00835     /**
00836      * Extracts the basic variables from isBasisVar
00837      */
00838     void findBasicVariables(single_node *U, single_node *V) const;
00839 
00840     /**
00841      * Checks if the current solution is optimal.
00842      */
00843     int isOptimal(single_node *U, single_node *V) const;
00844 
00845     /**
00846      * Find a new/better solution to the transportation problem
00847      */
00848     void newSolution() const;
00849     
00850     /**
00851      * Tries to find a loop in the current solution
00852      */
00853     int findLoop(double_node **Loop) const;
00854 
00855     /**
00856      * Constructs a matrix containing the flow from distribution1 to
00857      * distribution2. Where the first is in rows and the second in
00858      * columns.
00859      */
00860     bool findFlow(const int& slen, const int& dlen, matrix<W>& flow) const;
00861 
00862 #ifdef _LTI_DEBUG  
00863     void dumpVariables() const;
00864 #endif
00865     
00866   };
00867 
00868 }
00869 
00870 #include "ltiUndebug.h"
00871 #include "ltiEarthMoversDistance_template.h"
00872 
00873 #endif
00874 
00875   

Generated on Sat Apr 10 15:25:24 2010 for LTI-Lib by Doxygen 1.6.1