ltiMLP.h

00001 /*
00002  * Copyright (C) 2002, 2003, 2004, 2005, 2006
00003  * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany
00004  *
00005  * This file is part of the LTI-Computer Vision Library (LTI-Lib)
00006  *
00007  * The LTI-Lib is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public License (LGPL)
00009  * as published by the Free Software Foundation; either version 2.1 of
00010  * the License, or (at your option) any later version.
00011  *
00012  * The LTI-Lib is distributed in the hope that it will be
00013  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
00014  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with the LTI-Lib; see the file LICENSE.  If
00019  * not, write to the Free Software Foundation, Inc., 59 Temple Place -
00020  * Suite 330, Boston, MA 02111-1307, USA.
00021  */
00022 
00023 
00024 /*--------------------------------------------------------------------
00025  * project ....: LTI-Lib: Image Processing and Computer Vision Library
00026  * file .......: ltiMLP.h
00027  * authors ....: Bastian Ibach, Pablo Alvarado
00028  * organization: LTI, RWTH Aachen
00029  * creation ...: 13.8.2002
00030  * revisions ..: $Id: ltiMLP.h,v 1.7 2007/01/10 02:25:44 alvarado Exp $
00031  */
00032 
00033 #ifndef _LTI_M_L_P_H_
00034 #define _LTI_M_L_P_H_
00035 
00036 #include "ltiSupervisedInstanceClassifier.h"
00037 #include "ltiVector.h"
00038 #include "ltiObjectFactory.h"
00039 #include <vector>
00040 
00041 namespace lti {
00042 
00043   /**
00044    * Multi-layer perceptrons
00045    *
00046    * This class implements multi-layer neural networks using different
00047    * training methods.
00048    *
00049    * A number of layers between 1 and 3 is allowed.
00050    *
00051    * Training methods implemented at this time are:
00052    * - generalized delta-rule (steepest descent) with and without momentum,
00053    * - conjugate gradients.
00054    *
00055    * Following example shows how to use this sort of classifier:
00056    *
00057    * \code
00058    *    double inData[] = {-1,-1,
00059    *                       -1, 0,
00060    *                       -1,+1,
00061    *                       +0,+1,
00062    *                       +1,+1,
00063    *                       +1,+0,
00064    *                       +1,-1,
00065    *                       +0,-1,
00066    *                       +0,+0};
00067    *    lti::dmatrix inputs(9,2,inData);          // training vectors
00068    *
00069    *    int idsData[] = {1,0,1,0,1,0,1,0,1}; // and the respective ids
00070    *    lti::ivector ids(9,idsData);
00071    *
00072    *    lti::MLP ann;  // our artificial neural network
00073    *
00074    *    lti::MLP::parameters param;
00075    *    lti::MLP::sigmoidFunctor sigmoid(1);
00076    *    param.setLayers(4,sigmoid);  // two layers and four hidden units.
00077    *    param.trainingMode = lti::MLP::parameters::ConjugateGradients;
00078    *    param.maxNumberOfEpochs=200;
00079    *    ann.setParameters(param);
00080    *
00081    *    // we want to see some info while training
00082    *    streamProgressInfo prog(std::cout);
00083    *    ann.setProgressObject(prog);
00084    *
00085    *    // train the network
00086    *    ann.train(inputs,ids);
00087    *
00088    *    // let us save our network for future use
00089    *    // in the file called mlp.dat
00090    *    std::ofstream out("mlp.dat");
00091    *    lti::lispStreamHandler lsh(out);
00092    *
00093    *    // save the network
00094    *    ann.write(lsh);
00095    *    // close the file
00096    *    out.close();
00097    *
00098    *    // show some results with the same training set:
00099    *
00100    *    lti::MLP::outputVector outv; // here we will get some
00101    *                                 // classification results
00102    *    cout << endl << "Results: " << endl;
00103    *
00104    *    int i,id;
00105    *    for (i=0;i<inputs.rows();++i) {
00106    *      ann.classify(inputs.getRow(i),outv);
00107    *      cout << "Input " << inputs.getRow(i) << " \tOutput: ";
00108    *      outv.getId(outv.getWinnerUnit(),id);
00109    *      cout << id;
00110    *      if (id != ids.at(i)) {
00111    *        cout << " <- should be " << ids.at(i);
00112    *      }
00113    *      cout << endl;
00114    *    }
00115    * \endcode
00116    *
00117    * Better display for the classification of 2D problems can be
00118    * generated using the functor lti::classifier2DVisualizer.
00119    *
00120    */
00121   class MLP : public supervisedInstanceClassifier {
00122   public:
00123 
00124     // ----------------------------------------------
00125     // activation functor
00126     // ----------------------------------------------
00127 
00128     /**
00129      * Parent class for all activation function functors
00130      */
00131     class activationFunctor : public object {
00132     public:
00133       /**
00134        * the functor operator.  Operates on place, applying the function
00135        * to each element of the vector
00136        */
00137       virtual bool apply(dvector& output) const = 0;
00138 
00139       /**
00140        * the functor operator.  Operates on copy
00141        */
00142       virtual bool apply(const dvector& src, dvector& output) const =  0;
00143 
00144       /**
00145        * the derivative of the functor
00146        */
00147       virtual bool deriv(dvector& output) const = 0;
00148 
00149       /**
00150        * return a copy of this functor
00151        */
00152       virtual activationFunctor* clone() const = 0;
00153 
00154       /**
00155        * return value used to represent "true" or "on"
00156        */
00157       virtual const double& onValue() const = 0;
00158 
00159       /**
00160        * return value used to represent "false" or "off"
00161        */
00162       virtual const double& offValue() const = 0;
00163 
00164       /**
00165        * write the parameters in the given ioHandler
00166        * @param handler the ioHandler to be used
00167        * @param complete if true (the default) the enclosing begin/end will
00168        *        be also written, otherwise only the data block will be written.
00169        * @return true if write was successful
00170        */
00171       virtual bool write(ioHandler& handler,const bool complete=true) const {
00172         bool b = true;
00173         if (complete) {
00174           b = b && handler.writeBegin();
00175           b = b && handler.writeEnd();
00176         }
00177         return b;
00178       }
00179 
00180       /**
00181        * read the parameters from the given ioHandler
00182        * @param handler the ioHandler to be used
00183        * @param complete if true (the default) the enclosing begin/end will
00184        *        be also written, otherwise only the data block will be written.
00185        * @return true if write was successful
00186        */
00187       virtual bool read(ioHandler& handler,const bool complete=true) {
00188         bool b = true;
00189         if (complete) {
00190           b = b && handler.readBegin();
00191           b = b && handler.readEnd();
00192         }
00193         return b;
00194       }
00195     };
00196 
00197     /**
00198      * a linear activation function
00199      */
00200     class linearActFunctor : public activationFunctor {
00201     public:
00202       /**
00203        * the functor operator
00204        */
00205       virtual bool apply(dvector& output) const {return true;};
00206 
00207       /**
00208        * the functor operator.  Operates on copy
00209        */
00210       virtual bool apply(const dvector& src, dvector& output) const {
00211         output.copy(src);
00212         return true;
00213       };
00214 
00215       /**
00216        * the derivative of the functor
00217        */
00218       virtual bool deriv(dvector& output) const {
00219         output.fill(1.0);
00220         return true;
00221       };
00222 
00223       /**
00224        * return value used to represent "true" or "on"
00225        */
00226       virtual const double& onValue() const {
00227         static const double theOnValue = +1.0;
00228         return theOnValue;
00229       };
00230 
00231       /**
00232        * return value used to represent "false" or "off"
00233        */
00234       virtual const double& offValue() const {
00235         static const double theOffValue = -1.0;
00236         return theOffValue;
00237       };
00238 
00239       /**
00240        * return a copy of this functor
00241        */
00242       virtual activationFunctor* clone() const {return new linearActFunctor;};
00243     };
00244 
00245     /**
00246      * a sign activation function (1.0 if input 0 or positive, -1.0 otherwise)
00247      */
00248     class signFunctor : public activationFunctor {
00249     public:
00250       /**
00251        * the functor operator
00252        */
00253       virtual bool apply(dvector& output) const {
00254         dvector::iterator it,eit;
00255         for (it=output.begin(),eit=output.end();it!=eit;++it) {
00256           (*it) = (*it) >= 0 ? 1.0 : -1.0;
00257         }
00258         return true;
00259       };
00260 
00261       /**
00262        * the functor operator.  Operates on copy
00263        */
00264       virtual bool apply(const dvector& src, dvector& output) const {
00265         dvector::const_iterator it,eit;
00266         dvector::iterator oit;
00267         output.resize(src.size(),0.0,false,false);
00268         for (it=src.begin(),eit=src.end(),oit=output.begin();
00269              it!=eit;
00270              ++it,++oit) {
00271           (*oit) = (*it) >= 0 ? 1.0 : -1.0;
00272         }
00273         return true;
00274       };
00275 
00276       /**
00277        * the derivative of the functor (will return 1.0 to allow learning)
00278        */
00279       virtual bool deriv(dvector& output) const {
00280         output.fill(1.0);
00281         return true;
00282       };
00283 
00284       /**
00285        * return a copy of this functor
00286        */
00287       virtual activationFunctor* clone() const {return new signFunctor;};
00288 
00289       /**
00290        * return value used to represent "true" or "on"
00291        */
00292       virtual const double& onValue() const {
00293         static const double theOnValue = +1.0;
00294         return theOnValue;
00295       };
00296 
00297       /**
00298        * return value used to represent "false" or "off"
00299        */
00300       virtual const double& offValue() const {
00301         static const double theOffValue = -1.0;
00302         return theOffValue;
00303       };
00304     };
00305 
00306     /**
00307      * a sigmoid activation function
00308      */
00309     class sigmoidFunctor : public activationFunctor {
00310     protected:
00311       double slope;
00312     public:
00313       /**
00314        * constructor using explicit sigmoid slope
00315        */
00316       sigmoidFunctor(const double& theSlope):
00317         slope(theSlope) {};
00318 
00319       /**
00320        * constructor of a sigmoid with slope 1.0
00321        */
00322       sigmoidFunctor() : slope(1.0) {};
00323 
00324       /**
00325        * the functor operator
00326        */
00327       virtual bool apply(dvector& output) const {
00328         dvector::iterator it,eit;
00329         for (it=output.begin(),eit=output.end();it!=eit;++it) {
00330           (*it) = 1.0/(1.0+exp(-(*it)*slope));
00331         }
00332         return true;
00333       };
00334 
00335       /**
00336        * the functor operator.  Operates on copy
00337        */
00338       virtual bool apply(const dvector& src, dvector& output) const {
00339         dvector::const_iterator it,eit;
00340         dvector::iterator oit;
00341         output.resize(src.size(),0.0,false,false);
00342         for (it=src.begin(),eit=src.end(),oit=output.begin();
00343              it!=eit;
00344              ++it,++oit) {
00345           (*oit) = 1.0/(1.0+exp(-(*it)*slope));
00346 
00347         }
00348         return true;
00349       };
00350 
00351       /**
00352        * the derivative of the functor
00353        */
00354       virtual bool deriv(dvector& output) const {
00355         dvector::iterator it,eit;
00356         for (it=output.begin(),eit=output.end();it!=eit;++it) {
00357           (*it) = 1.0/(1.0+exp(-(*it)*slope));
00358           (*it) = (*it)*(1.0-(*it))*slope;
00359         }
00360         return true;
00361       };
00362 
00363       /**
00364        * return a copy of this functor
00365        */
00366       virtual activationFunctor* clone() const {
00367         return new sigmoidFunctor(slope);
00368       };
00369 
00370       /**
00371        * return value used to represent "true" or "on"
00372        */
00373       virtual const double& onValue() const {
00374         static const double theOnValue = +1.0;
00375         return theOnValue;
00376       };
00377 
00378       /**
00379        * return value used to represent "false" or "off"
00380        */
00381       virtual const double& offValue() const {
00382         static const double theOffValue = 0.0;
00383         return theOffValue;
00384       };
00385 
00386       /**
00387        * write the parameters in the given ioHandler
00388        * @param handler the ioHandler to be used
00389        * @param complete if true (the default) the enclosing begin/end will
00390        *        be also written, otherwise only the data block will be written.
00391        * @return true if write was successful
00392        */
00393       virtual bool write(ioHandler& handler,const bool complete=true) const {
00394         bool b = true;
00395         if (complete) {
00396           b = b && handler.writeBegin();
00397         }
00398         b = b && lti::write(handler,"slope",slope);
00399         if (complete) {
00400           b = b && handler.writeEnd();
00401         }
00402         return b;
00403       }
00404 
00405       /**
00406        * read the parameters from the given ioHandler
00407        * @param handler the ioHandler to be used
00408        * @param complete if true (the default) the enclosing begin/end will
00409        *        be also written, otherwise only the data block will be written.
00410        * @return true if write was successful
00411        */
00412       virtual bool read(ioHandler& handler,const bool complete=true) {
00413         bool b = true;
00414         if (complete) {
00415           b = b && handler.readBegin();
00416         }
00417         b = b && lti::read(handler,"slope",slope);
00418         if (complete) {
00419           b = b && handler.readEnd();
00420         }
00421         return b;
00422       }
00423     };
00424 
00425     /**
00426      * the parameters for the class MLP
00427      */
00428     class parameters : public supervisedInstanceClassifier::parameters {
00429     private:
00430       /**
00431        * activations functors
00432        */
00433       static const activationFunctor *const  activationFunctors[];
00434 
00435       /**
00436        * factory to create activation functors
00437        */
00438       static objectFactory<activationFunctor> objFactory;
00439 
00440     public:
00441       /**
00442        * default constructor
00443        */
00444       parameters();
00445 
00446       /**
00447        * copy constructor
00448        * @param other the parameters object to be copied
00449        */
00450       parameters(const parameters& other);
00451 
00452       /**
00453        * destructor
00454        */
00455       virtual ~parameters();
00456 
00457       /**
00458        * returns name of this type
00459        */
00460       const char* getTypeName() const;
00461 
00462       /**
00463        * copy the contents of a parameters object
00464        * @param other the parameters object to be copied
00465        * @return a reference to this parameters object
00466        */
00467       parameters& copy(const parameters& other);
00468 
00469       /**
00470        * copy the contents of a parameters object
00471        * @param other the parameters object to be copied
00472        * @return a reference to this parameters object
00473        */
00474       parameters& operator=(const parameters& other);
00475 
00476 
00477       /**
00478        * returns a pointer to a clone of the parameters
00479        */
00480       virtual classifier::parameters* clone() const;
00481 
00482       /**
00483        * write the parameters in the given ioHandler
00484        * @param handler the ioHandler to be used
00485        * @param complete if true (the default) the enclosing begin/end will
00486        *        be also written, otherwise only the data block will be written.
00487        * @return true if write was successful
00488        */
00489       virtual bool write(ioHandler& handler,const bool complete=true) const;
00490 
00491       /**
00492        * read the parameters from the given ioHandler
00493        * @param handler the ioHandler to be used
00494        * @param complete if true (the default) the enclosing begin/end will
00495        *        be also written, otherwise only the data block will be written.
00496        * @return true if write was successful
00497        */
00498       virtual bool read(ioHandler& handler,const bool complete=true);
00499 
00500 #     ifdef _LTI_MSC_6
00501       /**
00502        * this function is required by MSVC only, as a workaround for a
00503        * very awful bug, which exists since MSVC V.4.0, and still by
00504        * V.6.0 with all bugfixes (so called "service packs") remains
00505        * there...  This method is also public due to another bug, so please
00506        * NEVER EVER call this method directly: use read() instead
00507        */
00508       bool readMS(ioHandler& handler,const bool complete=true);
00509 
00510       /**
00511        * this function is required by MSVC only, as a workaround for a
00512        * very awful bug, which exists since MSVC V.4.0, and still by
00513        * V.6.0 with all bugfixes (so called "service packs") remains
00514        * there...  This method is also public due to another bug, so please
00515        * NEVER EVER call this method directly: use write() instead
00516        */
00517       bool writeMS(ioHandler& handler,const bool complete=true) const;
00518 #     endif
00519 
00520       /**
00521        * initialize the parameters to create a MLP with two layers
00522        * with the given number of hidden units (the number of
00523        * input and output units is determined in the training stage).
00524        *
00525        * @param hidden number of hidden units
00526        * @param activ activation function to be used in all units
00527        *
00528        */
00529       bool setLayers(const int hidden,
00530                      const activationFunctor& activ);
00531 
00532       /**
00533        * initialize the parameters to create a MLP with one sigle layer
00534        * (the number of input and output units is determined in the training 
00535        * stage).
00536        *
00537        * @param activ activation function to be used in all units
00538        */
00539       bool setLayers(const activationFunctor& activ);
00540 
00541       /**
00542        * set the activation functor for a given layer.  The current number of
00543        * layers is determined by the size of the \a hiddenUnits attribute.
00544        *
00545        * @param layer number of layer
00546        * @param aFct activationFunctor
00547        * @return true if successful, or false otherwise (usually, the
00548        * layer number is invalid
00549        */
00550       bool setLayerActivation(const int layer, const activationFunctor& aFct);
00551 
00552 
00553       // ------------------------------------------------
00554       // the parameters
00555       // ------------------------------------------------
00556 
00557       /**
00558        * Training type
00559        */
00560       enum eTrainingType {
00561         SteepestDescent, /**< Generalized Delta-Rule.  Just use the gradient
00562                           *   and the learnrate
00563                           */
00564         ConjugateGradients /**< Conjugate Gradients */
00565       };
00566 
00567       /**
00568        * Traning mode to be used.
00569        *
00570        * Default value: ConjugateGradients
00571        */
00572       eTrainingType trainingMode;
00573 
00574       /**
00575        * If true, an epoch (all the training data) will be presented before a
00576        * weigh adaption is taken.  Otherwise just training point is considered
00577        * to adapt the weights.  For the conjugate gradient method this mode
00578        * is ignored (assumed true).
00579        *
00580        * Default value: true
00581        */
00582       bool batchMode;
00583 
00584       /**
00585        * Value for the momentum used in the steepest descent methods.
00586        * Should be between 0.0 and 1.0.
00587        *
00588        * Default value: 0.0 (no momentum)
00589        */
00590       double momentum;
00591 
00592       /**
00593        * Number of units in the hidden layers.
00594        *
00595        * The size of this vector determines indirectly the number of
00596        * layers for the network.  It corresponds to is the number of
00597        * layers minus one, which means that the total number of layers
00598        * for the network is equal to the size of this vector plus one.
00599        *
00600        * Default value: [4] (i.e. a 2 layer network with 4 units in the hidden
00601        *                     layer)
00602        */
00603       ivector hiddenUnits;
00604 
00605       /**
00606        * Learning rate for the steepest descent method.
00607        */
00608       float learnrate;
00609 
00610       /**
00611        * Maximal number of epochs (number of presentations of the entire
00612        * training set)
00613        *
00614        * Default: 500
00615        */
00616       int maxNumberOfEpochs;
00617 
00618       /**
00619        * If this error value (or lower) is reached, the training is stopped.
00620        * For the conjugate gradients method, the algorithm is stopped if
00621        * the magnitude of the gradient is smaller than this value multiplied
00622        * by the magnitude of the initial gradient.
00623        *
00624        * Default value: 0.005
00625        */
00626       double stopError;
00627 
00628       /**
00629        * Activation functors (per layer).  The objects pointed by these
00630        * elements will be deleted when the parameters are delete.
00631        *
00632        * Default value: sigmoids
00633        */
00634       std::vector<activationFunctor*> activationFunctions;
00635     };
00636 
00637     /**
00638      * default constructor
00639      */
00640     MLP();
00641 
00642     /**
00643      * copy constructor
00644      * @param other the object to be copied
00645      */
00646     MLP(const MLP& other);
00647 
00648     /**
00649      * destructor
00650      */
00651     virtual ~MLP();
00652 
00653     /**
00654      * returns the name of this type ("MLP")
00655      */
00656     virtual const char* getTypeName() const;
00657 
00658     /**
00659      * copy data of "other" classifier.
00660      * @param other the classifier to be copied
00661      * @return a reference to this classifier object
00662      */
00663     MLP& copy(const MLP& other);
00664 
00665     /**
00666      * alias for copy member
00667      * @param other the classifier to be copied
00668      * @return a reference to this classifier object
00669      */
00670     MLP& operator=(const MLP& other);
00671 
00672     /**
00673      * returns a pointer to a clone of this classifier.
00674      */
00675     virtual classifier* clone() const;
00676 
00677     /**
00678      * returns used parameters
00679      */
00680     const parameters& getParameters() const;
00681 
00682     /**
00683      * Supervised training.
00684      * The vectors in the <code>input</code> matrix
00685      * must be trained using as "known" classes the values given in
00686      * <code>ids</code>.
00687      * @param input the matrix with input vectors (each row is a training
00688      *              vector)
00689      * @param ids the output classes ids for the input vectors.
00690      * @return true if successful, false otherwise.  (if false you can check
00691      *              the error message with getStatusString())
00692      */
00693     virtual bool train(const dmatrix& input,
00694                        const ivector& ids);
00695 
00696     /**
00697      * Supervised training.
00698      *
00699      * This method, used for debugging purposes mainly, initialize the
00700      * weights with the values given.
00701      *
00702      * The vectors in the \a input matrix must be trained using as "known"
00703      * classes the values given in \a ids.
00704      *
00705      * @param weights this vector is used to initialize the weights.  Must
00706      *                be consistent with the parameters.
00707      * @param input the matrix with input vectors (each row is a training
00708      *              vector)
00709      * @param ids the output classes ids for the input vectors.
00710      * @return true if successful, false otherwise.  (if false you can check
00711      *              the error message with getStatusString())
00712      */
00713     virtual bool train(const dvector& weights,
00714                        const dmatrix& input,
00715                        const ivector& ids);
00716 
00717     //TODO Check whether you really need a new classify method.
00718     // In some cases the superclasses method will suffice. Then just
00719     // delete the declaration and its implementation stump.
00720 
00721     /**
00722      * Classification.
00723      * Classifies the feature and returns the outputVector with
00724      * the classification result.
00725      * @param feature the %vector to be classified
00726      * @param result the result of the classification
00727      * @return false if an error occurred during classification else true
00728      */
00729     virtual bool classify(const dvector& feature, outputVector& result) const;
00730 
00731     /**
00732      * write the rbf classifier in the given ioHandler
00733      * @param handler the ioHandler to be used
00734      * @param complete if true (the default) the enclosing begin/end will
00735      *        be also written, otherwise only the data block will be written.
00736      * @return true if write was successful
00737      */
00738     virtual bool write(ioHandler& handler,const bool complete=true) const;
00739 
00740     /**
00741      * read the rbf classifier from the given ioHandler
00742      * @param handler the ioHandler to be used
00743      * @param complete if true (the default) the enclosing begin/end will
00744      *        be also written, otherwise only the data block will be written.
00745      * @return true if write was successful
00746      */
00747     virtual bool read(ioHandler& handler,const bool complete=true);
00748 
00749     /*
00750      * Undocumented function for debug purposes only.  It display the
00751      * internal weight matrices
00752      */
00753     bool prettyPrint();
00754 
00755     /**
00756      * Return a reference to the internal weights vector.
00757      * Used mainly for debugging purposes.
00758      */
00759     const dvector& getWeights() const;
00760 
00761 
00762     //TODO: comment the attributes of your classifier
00763     // If you add more attributes manually, do not forget to do following:
00764     // 1. indicate in the default constructor the default values
00765     // 2. make sure that the copy member also copy your new attributes, or
00766     //    to ensure there, that these attributes are properly initialized.
00767 
00768   private:
00769 
00770     // used variables
00771 
00772     /**
00773      * vector for saving the weights of the network
00774      */
00775     vector<double> weights;
00776 
00777     /**
00778      * indices for the begining of each layer in the weights vector
00779      */
00780     ivector layerIndex;
00781 
00782     /**
00783      * weights as matrix, to simplify propagation.
00784      * The matrices share the memory with weights.
00785      *
00786      * All matrices are initialized in initWeights()
00787      */
00788     std::vector< dmatrix > matWeights;
00789 
00790     /**
00791      * number of input units (determined by the size of the training data)
00792      */
00793     int inputs;
00794 
00795     /**
00796      * number of outputs (determined by training)
00797      */
00798     int outputs;
00799 
00800     /**
00801      * check how many outputs are required for the given
00802      * (external) ids, and set the outputs attribute.
00803      */
00804     void checkHowManyOutputs(const ivector& ids);
00805 
00806 
00807     /**
00808      * unitsOut, save output of each unit
00809      * each layer ist one element of the vector
00810      * first layer has index 0
00811      */
00812     mutable std::vector< dvector > unitsOut;
00813 
00814     /**
00815      * net value at each layer
00816      */
00817     mutable std::vector < dvector > unitsNet;
00818 
00819     /**
00820      * Value for "off" at the output layer (it depends on the activation
00821      * functor of the output layer.
00822      */
00823     double off;
00824 
00825     /**
00826      * Value for "on" at the output layer (it depends on the activation
00827      * functor of the output layer.
00828      */
00829     double on;
00830 
00831     /**
00832      * initialize weights with random values.
00833      * This method assumes that the attributes inputs and outputs are correct.
00834      *
00835      * @param keepWeightVals if true, the contents of weights won't be
00836      *                       changed (if they are incompatible with the
00837      *                       actual parameters, false will be returned).
00838      *                       if false, the values of the weights will be
00839      *                       initialized with random values between
00840      *                       "from" and "to".
00841      * @param from lower value for random values
00842      * @param to higher value for random values
00843      * @return true if successful, false otherwise.
00844      */
00845     bool initWeights(const bool keepWeightVals = false,
00846                      const double& from = -1.0,
00847                      const double& to = +1.0);
00848 
00849 
00850     /**
00851      * Given the weights vector, update the vector of matrices so that
00852      * each matrix uses its respective memory block in the vector.
00853      */
00854     void updateWeightIndices(dvector& theWeights,
00855                              std::vector<dmatrix>& theWMats) const;
00856 
00857 
00858     /**
00859      * accumulated error for one epoch
00860      */
00861     double totalError;
00862 
00863     /**
00864      * Error norm
00865      */
00866     double errorNorm;
00867 
00868     /**
00869      * calculate all outputs for all network units.  The result will
00870      * be left in the unitsOut attribute.
00871      *
00872      * @param input vector
00873      */
00874     bool propagate(const dvector& input) const;
00875 
00876     /**
00877      * calculate all outputs for all network units.
00878      *
00879      * @param input input vector
00880      * @param mWeights weights in matrix form
00881      * @param uNet preliminary results for each unit with the "net" values
00882      * @param uOut output for all units (f(net)).
00883      * @return true if successful, false otherwise.
00884      */
00885     bool propagate(const dvector& input,
00886                    const std::vector<dmatrix>& mWeights,
00887                    std::vector<dvector>& uNet,
00888                    std::vector<dvector>& uOut) const;
00889 
00890 
00891     /**
00892      * compute the error using the last propagated input and the given
00893      * pattern
00894      */
00895     bool computeActualError(const int id,double& error) const;
00896 
00897     /**
00898      * compute the error using the given output units vector
00899      */
00900     bool computePatternError(const int id,
00901                              const dvector& outUnits,
00902                              double& error) const;
00903 
00904     /**
00905      * compute the error of the given weights for the whole training set.
00906      */
00907     bool computeTotalError(const std::vector<dmatrix>& mWeights,
00908                            const dmatrix& inputs,
00909                            const ivector& ids,
00910                            double& totalError) const;
00911 
00912     /**
00913      * compute mat*vct' where vct' is a vector with one additional element
00914      * (1.0) at the beginning of vct.
00915      */
00916     bool biasMultiply(const dmatrix& mat,
00917                       const dvector& vct,
00918                       dvector& res) const;
00919 
00920     /**
00921      * calculate the negative gradient of error surface using
00922      * back-propagation algorithm
00923      *
00924      * @param input input vector
00925      * @param outputId desired output.  This value must be between 0 and
00926      *                 the number of output elements-1.
00927      * @param grad computed gradient of the error surface
00928      * @return true if successful, or false otherwise.
00929      */
00930     bool calcGradient(const dvector& input,
00931                       const int outputId,
00932                       dvector& grad);
00933 
00934     /**
00935      * calculate negative gradient of error surface using
00936      * back-propagation algorithm for all patterns in an epoch.
00937      *
00938      * @param input input vector
00939      * @param outputId desired output.  This value must be between 0 and
00940      *                 the number of output elements-1.
00941      * @param grad computed gradient of the error surface
00942      * @return true if successful, or false otherwise.
00943      */
00944     bool calcGradient(const dmatrix& inputs,
00945                       const ivector& ids,
00946                       dvector& grad);
00947 
00948 
00949     /**
00950      * train the network with steepest descent method (batch mode)
00951      * Weights must be initialized previously
00952      */
00953     bool trainSteepestBatch(const dmatrix& inputs,
00954                             const ivector& internalIds);
00955 
00956 
00957     /**
00958      * train the network with steepest descent method (sequential mode)
00959      * Weights must be initialized previously
00960      */
00961     bool trainSteepestSequential(const dmatrix& inputs,
00962                                  const ivector& internalIds);
00963 
00964 
00965     /**
00966      * train the network with steepest descent method (batch mode)
00967      * Weights must be initialized previously
00968      */
00969     bool trainConjugateGradients(const dmatrix& inputs,
00970                                  const ivector& internalIds);
00971 
00972     /**
00973      * compute the error norm, with which all displayed error values are
00974      * normalized.  This will allow to compare easily different classifiers.
00975      */
00976     bool computeErrorNorm(const ivector& internIds);
00977 
00978     /**
00979      * line search computes the eta scalar factor at which the error
00980      * is minimized.  It begins at the actual weight and follows the given
00981      * direction.
00982      */
00983     bool lineSearch(const dmatrix& inputs,
00984                     const ivector& ids,
00985                     const dvector& direction,
00986                     double& eta,
00987                     dvector& newWeights) const;
00988   };
00989 
00990 
00991   /**
00992    * write the functor::parameters in the given ioHandler.
00993    * The complete flag indicates
00994    * if the enclosing begin and end should be also be written or not
00995    */
00996   bool write(ioHandler& handler,const MLP::activationFunctor& p,
00997              const bool complete = true);
00998 
00999   /**
01000    * read the functor::parameters from the given ioHandler.
01001    * The complete flag indicates
01002    * if the enclosing begin and end should be also be written or not
01003    */
01004   bool read(ioHandler& handler,MLP::activationFunctor& p,
01005             const bool complete = true);
01006 
01007 }
01008 
01009 #endif