latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2002, 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 00024 /*-------------------------------------------------------------------- 00025 * project ....: LTI-Lib: Image Processing and Computer Vision Library 00026 * file .......: ltiMLP.h 00027 * authors ....: Bastian Ibach, Pablo Alvarado 00028 * organization: LTI, RWTH Aachen 00029 * creation ...: 13.8.2002 00030 * revisions ..: $Id: ltiMLP.h,v 1.7 2007/01/10 02:25:44 alvarado Exp $ 00031 */ 00032 00033 #ifndef _LTI_M_L_P_H_ 00034 #define _LTI_M_L_P_H_ 00035 00036 #include "ltiSupervisedInstanceClassifier.h" 00037 #include "ltiVector.h" 00038 #include "ltiObjectFactory.h" 00039 #include <vector> 00040 00041 namespace lti { 00042 00043 /** 00044 * Multi-layer perceptrons 00045 * 00046 * This class implements multi-layer neural networks using different 00047 * training methods. 00048 * 00049 * A number of layers between 1 and 3 is allowed. 00050 * 00051 * Training methods implemented at this time are: 00052 * - generalized delta-rule (steepest descent) with and without momentum, 00053 * - conjugate gradients. 00054 * 00055 * Following example shows how to use this sort of classifier: 00056 * 00057 * \code 00058 * double inData[] = {-1,-1, 00059 * -1, 0, 00060 * -1,+1, 00061 * +0,+1, 00062 * +1,+1, 00063 * +1,+0, 00064 * +1,-1, 00065 * +0,-1, 00066 * +0,+0}; 00067 * lti::dmatrix inputs(9,2,inData); // training vectors 00068 * 00069 * int idsData[] = {1,0,1,0,1,0,1,0,1}; // and the respective ids 00070 * lti::ivector ids(9,idsData); 00071 * 00072 * lti::MLP ann; // our artificial neural network 00073 * 00074 * lti::MLP::parameters param; 00075 * lti::MLP::sigmoidFunctor sigmoid(1); 00076 * param.setLayers(4,sigmoid); // two layers and four hidden units. 00077 * param.trainingMode = lti::MLP::parameters::ConjugateGradients; 00078 * param.maxNumberOfEpochs=200; 00079 * ann.setParameters(param); 00080 * 00081 * // we want to see some info while training 00082 * streamProgressInfo prog(std::cout); 00083 * ann.setProgressObject(prog); 00084 * 00085 * // train the network 00086 * ann.train(inputs,ids); 00087 * 00088 * // let us save our network for future use 00089 * // in the file called mlp.dat 00090 * std::ofstream out("mlp.dat"); 00091 * lti::lispStreamHandler lsh(out); 00092 * 00093 * // save the network 00094 * ann.write(lsh); 00095 * // close the file 00096 * out.close(); 00097 * 00098 * // show some results with the same training set: 00099 * 00100 * lti::MLP::outputVector outv; // here we will get some 00101 * // classification results 00102 * cout << endl << "Results: " << endl; 00103 * 00104 * int i,id; 00105 * for (i=0;i<inputs.rows();++i) { 00106 * ann.classify(inputs.getRow(i),outv); 00107 * cout << "Input " << inputs.getRow(i) << " \tOutput: "; 00108 * outv.getId(outv.getWinnerUnit(),id); 00109 * cout << id; 00110 * if (id != ids.at(i)) { 00111 * cout << " <- should be " << ids.at(i); 00112 * } 00113 * cout << endl; 00114 * } 00115 * \endcode 00116 * 00117 * Better display for the classification of 2D problems can be 00118 * generated using the functor lti::classifier2DVisualizer. 00119 * 00120 */ 00121 class MLP : public supervisedInstanceClassifier { 00122 public: 00123 00124 // ---------------------------------------------- 00125 // activation functor 00126 // ---------------------------------------------- 00127 00128 /** 00129 * Parent class for all activation function functors 00130 */ 00131 class activationFunctor : public object { 00132 public: 00133 /** 00134 * the functor operator. Operates on place, applying the function 00135 * to each element of the vector 00136 */ 00137 virtual bool apply(dvector& output) const = 0; 00138 00139 /** 00140 * the functor operator. Operates on copy 00141 */ 00142 virtual bool apply(const dvector& src, dvector& output) const = 0; 00143 00144 /** 00145 * the derivative of the functor 00146 */ 00147 virtual bool deriv(dvector& output) const = 0; 00148 00149 /** 00150 * return a copy of this functor 00151 */ 00152 virtual activationFunctor* clone() const = 0; 00153 00154 /** 00155 * return value used to represent "true" or "on" 00156 */ 00157 virtual const double& onValue() const = 0; 00158 00159 /** 00160 * return value used to represent "false" or "off" 00161 */ 00162 virtual const double& offValue() const = 0; 00163 00164 /** 00165 * write the parameters in the given ioHandler 00166 * @param handler the ioHandler to be used 00167 * @param complete if true (the default) the enclosing begin/end will 00168 * be also written, otherwise only the data block will be written. 00169 * @return true if write was successful 00170 */ 00171 virtual bool write(ioHandler& handler,const bool complete=true) const { 00172 bool b = true; 00173 if (complete) { 00174 b = b && handler.writeBegin(); 00175 b = b && handler.writeEnd(); 00176 } 00177 return b; 00178 } 00179 00180 /** 00181 * read the parameters from the given ioHandler 00182 * @param handler the ioHandler to be used 00183 * @param complete if true (the default) the enclosing begin/end will 00184 * be also written, otherwise only the data block will be written. 00185 * @return true if write was successful 00186 */ 00187 virtual bool read(ioHandler& handler,const bool complete=true) { 00188 bool b = true; 00189 if (complete) { 00190 b = b && handler.readBegin(); 00191 b = b && handler.readEnd(); 00192 } 00193 return b; 00194 } 00195 }; 00196 00197 /** 00198 * a linear activation function 00199 */ 00200 class linearActFunctor : public activationFunctor { 00201 public: 00202 /** 00203 * the functor operator 00204 */ 00205 virtual bool apply(dvector& output) const {return true;}; 00206 00207 /** 00208 * the functor operator. Operates on copy 00209 */ 00210 virtual bool apply(const dvector& src, dvector& output) const { 00211 output.copy(src); 00212 return true; 00213 }; 00214 00215 /** 00216 * the derivative of the functor 00217 */ 00218 virtual bool deriv(dvector& output) const { 00219 output.fill(1.0); 00220 return true; 00221 }; 00222 00223 /** 00224 * return value used to represent "true" or "on" 00225 */ 00226 virtual const double& onValue() const { 00227 static const double theOnValue = +1.0; 00228 return theOnValue; 00229 }; 00230 00231 /** 00232 * return value used to represent "false" or "off" 00233 */ 00234 virtual const double& offValue() const { 00235 static const double theOffValue = -1.0; 00236 return theOffValue; 00237 }; 00238 00239 /** 00240 * return a copy of this functor 00241 */ 00242 virtual activationFunctor* clone() const {return new linearActFunctor;}; 00243 }; 00244 00245 /** 00246 * a sign activation function (1.0 if input 0 or positive, -1.0 otherwise) 00247 */ 00248 class signFunctor : public activationFunctor { 00249 public: 00250 /** 00251 * the functor operator 00252 */ 00253 virtual bool apply(dvector& output) const { 00254 dvector::iterator it,eit; 00255 for (it=output.begin(),eit=output.end();it!=eit;++it) { 00256 (*it) = (*it) >= 0 ? 1.0 : -1.0; 00257 } 00258 return true; 00259 }; 00260 00261 /** 00262 * the functor operator. Operates on copy 00263 */ 00264 virtual bool apply(const dvector& src, dvector& output) const { 00265 dvector::const_iterator it,eit; 00266 dvector::iterator oit; 00267 output.resize(src.size(),0.0,false,false); 00268 for (it=src.begin(),eit=src.end(),oit=output.begin(); 00269 it!=eit; 00270 ++it,++oit) { 00271 (*oit) = (*it) >= 0 ? 1.0 : -1.0; 00272 } 00273 return true; 00274 }; 00275 00276 /** 00277 * the derivative of the functor (will return 1.0 to allow learning) 00278 */ 00279 virtual bool deriv(dvector& output) const { 00280 output.fill(1.0); 00281 return true; 00282 }; 00283 00284 /** 00285 * return a copy of this functor 00286 */ 00287 virtual activationFunctor* clone() const {return new signFunctor;}; 00288 00289 /** 00290 * return value used to represent "true" or "on" 00291 */ 00292 virtual const double& onValue() const { 00293 static const double theOnValue = +1.0; 00294 return theOnValue; 00295 }; 00296 00297 /** 00298 * return value used to represent "false" or "off" 00299 */ 00300 virtual const double& offValue() const { 00301 static const double theOffValue = -1.0; 00302 return theOffValue; 00303 }; 00304 }; 00305 00306 /** 00307 * a sigmoid activation function 00308 */ 00309 class sigmoidFunctor : public activationFunctor { 00310 protected: 00311 double slope; 00312 public: 00313 /** 00314 * constructor using explicit sigmoid slope 00315 */ 00316 sigmoidFunctor(const double& theSlope): 00317 slope(theSlope) {}; 00318 00319 /** 00320 * constructor of a sigmoid with slope 1.0 00321 */ 00322 sigmoidFunctor() : slope(1.0) {}; 00323 00324 /** 00325 * the functor operator 00326 */ 00327 virtual bool apply(dvector& output) const { 00328 dvector::iterator it,eit; 00329 for (it=output.begin(),eit=output.end();it!=eit;++it) { 00330 (*it) = 1.0/(1.0+exp(-(*it)*slope)); 00331 } 00332 return true; 00333 }; 00334 00335 /** 00336 * the functor operator. Operates on copy 00337 */ 00338 virtual bool apply(const dvector& src, dvector& output) const { 00339 dvector::const_iterator it,eit; 00340 dvector::iterator oit; 00341 output.resize(src.size(),0.0,false,false); 00342 for (it=src.begin(),eit=src.end(),oit=output.begin(); 00343 it!=eit; 00344 ++it,++oit) { 00345 (*oit) = 1.0/(1.0+exp(-(*it)*slope)); 00346 00347 } 00348 return true; 00349 }; 00350 00351 /** 00352 * the derivative of the functor 00353 */ 00354 virtual bool deriv(dvector& output) const { 00355 dvector::iterator it,eit; 00356 for (it=output.begin(),eit=output.end();it!=eit;++it) { 00357 (*it) = 1.0/(1.0+exp(-(*it)*slope)); 00358 (*it) = (*it)*(1.0-(*it))*slope; 00359 } 00360 return true; 00361 }; 00362 00363 /** 00364 * return a copy of this functor 00365 */ 00366 virtual activationFunctor* clone() const { 00367 return new sigmoidFunctor(slope); 00368 }; 00369 00370 /** 00371 * return value used to represent "true" or "on" 00372 */ 00373 virtual const double& onValue() const { 00374 static const double theOnValue = +1.0; 00375 return theOnValue; 00376 }; 00377 00378 /** 00379 * return value used to represent "false" or "off" 00380 */ 00381 virtual const double& offValue() const { 00382 static const double theOffValue = 0.0; 00383 return theOffValue; 00384 }; 00385 00386 /** 00387 * write the parameters in the given ioHandler 00388 * @param handler the ioHandler to be used 00389 * @param complete if true (the default) the enclosing begin/end will 00390 * be also written, otherwise only the data block will be written. 00391 * @return true if write was successful 00392 */ 00393 virtual bool write(ioHandler& handler,const bool complete=true) const { 00394 bool b = true; 00395 if (complete) { 00396 b = b && handler.writeBegin(); 00397 } 00398 b = b && lti::write(handler,"slope",slope); 00399 if (complete) { 00400 b = b && handler.writeEnd(); 00401 } 00402 return b; 00403 } 00404 00405 /** 00406 * read the parameters from the given ioHandler 00407 * @param handler the ioHandler to be used 00408 * @param complete if true (the default) the enclosing begin/end will 00409 * be also written, otherwise only the data block will be written. 00410 * @return true if write was successful 00411 */ 00412 virtual bool read(ioHandler& handler,const bool complete=true) { 00413 bool b = true; 00414 if (complete) { 00415 b = b && handler.readBegin(); 00416 } 00417 b = b && lti::read(handler,"slope",slope); 00418 if (complete) { 00419 b = b && handler.readEnd(); 00420 } 00421 return b; 00422 } 00423 }; 00424 00425 /** 00426 * the parameters for the class MLP 00427 */ 00428 class parameters : public supervisedInstanceClassifier::parameters { 00429 private: 00430 /** 00431 * activations functors 00432 */ 00433 static const activationFunctor *const activationFunctors[]; 00434 00435 /** 00436 * factory to create activation functors 00437 */ 00438 static objectFactory<activationFunctor> objFactory; 00439 00440 public: 00441 /** 00442 * default constructor 00443 */ 00444 parameters(); 00445 00446 /** 00447 * copy constructor 00448 * @param other the parameters object to be copied 00449 */ 00450 parameters(const parameters& other); 00451 00452 /** 00453 * destructor 00454 */ 00455 virtual ~parameters(); 00456 00457 /** 00458 * returns name of this type 00459 */ 00460 const char* getTypeName() const; 00461 00462 /** 00463 * copy the contents of a parameters object 00464 * @param other the parameters object to be copied 00465 * @return a reference to this parameters object 00466 */ 00467 parameters& copy(const parameters& other); 00468 00469 /** 00470 * copy the contents of a parameters object 00471 * @param other the parameters object to be copied 00472 * @return a reference to this parameters object 00473 */ 00474 parameters& operator=(const parameters& other); 00475 00476 00477 /** 00478 * returns a pointer to a clone of the parameters 00479 */ 00480 virtual classifier::parameters* clone() const; 00481 00482 /** 00483 * write the parameters in the given ioHandler 00484 * @param handler the ioHandler to be used 00485 * @param complete if true (the default) the enclosing begin/end will 00486 * be also written, otherwise only the data block will be written. 00487 * @return true if write was successful 00488 */ 00489 virtual bool write(ioHandler& handler,const bool complete=true) const; 00490 00491 /** 00492 * read the parameters from the given ioHandler 00493 * @param handler the ioHandler to be used 00494 * @param complete if true (the default) the enclosing begin/end will 00495 * be also written, otherwise only the data block will be written. 00496 * @return true if write was successful 00497 */ 00498 virtual bool read(ioHandler& handler,const bool complete=true); 00499 00500 # ifdef _LTI_MSC_6 00501 /** 00502 * this function is required by MSVC only, as a workaround for a 00503 * very awful bug, which exists since MSVC V.4.0, and still by 00504 * V.6.0 with all bugfixes (so called "service packs") remains 00505 * there... This method is also public due to another bug, so please 00506 * NEVER EVER call this method directly: use read() instead 00507 */ 00508 bool readMS(ioHandler& handler,const bool complete=true); 00509 00510 /** 00511 * this function is required by MSVC only, as a workaround for a 00512 * very awful bug, which exists since MSVC V.4.0, and still by 00513 * V.6.0 with all bugfixes (so called "service packs") remains 00514 * there... This method is also public due to another bug, so please 00515 * NEVER EVER call this method directly: use write() instead 00516 */ 00517 bool writeMS(ioHandler& handler,const bool complete=true) const; 00518 # endif 00519 00520 /** 00521 * initialize the parameters to create a MLP with two layers 00522 * with the given number of hidden units (the number of 00523 * input and output units is determined in the training stage). 00524 * 00525 * @param hidden number of hidden units 00526 * @param activ activation function to be used in all units 00527 * 00528 */ 00529 bool setLayers(const int hidden, 00530 const activationFunctor& activ); 00531 00532 /** 00533 * initialize the parameters to create a MLP with one sigle layer 00534 * (the number of input and output units is determined in the training 00535 * stage). 00536 * 00537 * @param activ activation function to be used in all units 00538 */ 00539 bool setLayers(const activationFunctor& activ); 00540 00541 /** 00542 * set the activation functor for a given layer. The current number of 00543 * layers is determined by the size of the \a hiddenUnits attribute. 00544 * 00545 * @param layer number of layer 00546 * @param aFct activationFunctor 00547 * @return true if successful, or false otherwise (usually, the 00548 * layer number is invalid 00549 */ 00550 bool setLayerActivation(const int layer, const activationFunctor& aFct); 00551 00552 00553 // ------------------------------------------------ 00554 // the parameters 00555 // ------------------------------------------------ 00556 00557 /** 00558 * Training type 00559 */ 00560 enum eTrainingType { 00561 SteepestDescent, /**< Generalized Delta-Rule. Just use the gradient 00562 * and the learnrate 00563 */ 00564 ConjugateGradients /**< Conjugate Gradients */ 00565 }; 00566 00567 /** 00568 * Traning mode to be used. 00569 * 00570 * Default value: ConjugateGradients 00571 */ 00572 eTrainingType trainingMode; 00573 00574 /** 00575 * If true, an epoch (all the training data) will be presented before a 00576 * weigh adaption is taken. Otherwise just training point is considered 00577 * to adapt the weights. For the conjugate gradient method this mode 00578 * is ignored (assumed true). 00579 * 00580 * Default value: true 00581 */ 00582 bool batchMode; 00583 00584 /** 00585 * Value for the momentum used in the steepest descent methods. 00586 * Should be between 0.0 and 1.0. 00587 * 00588 * Default value: 0.0 (no momentum) 00589 */ 00590 double momentum; 00591 00592 /** 00593 * Number of units in the hidden layers. 00594 * 00595 * The size of this vector determines indirectly the number of 00596 * layers for the network. It corresponds to is the number of 00597 * layers minus one, which means that the total number of layers 00598 * for the network is equal to the size of this vector plus one. 00599 * 00600 * Default value: [4] (i.e. a 2 layer network with 4 units in the hidden 00601 * layer) 00602 */ 00603 ivector hiddenUnits; 00604 00605 /** 00606 * Learning rate for the steepest descent method. 00607 */ 00608 float learnrate; 00609 00610 /** 00611 * Maximal number of epochs (number of presentations of the entire 00612 * training set) 00613 * 00614 * Default: 500 00615 */ 00616 int maxNumberOfEpochs; 00617 00618 /** 00619 * If this error value (or lower) is reached, the training is stopped. 00620 * For the conjugate gradients method, the algorithm is stopped if 00621 * the magnitude of the gradient is smaller than this value multiplied 00622 * by the magnitude of the initial gradient. 00623 * 00624 * Default value: 0.005 00625 */ 00626 double stopError; 00627 00628 /** 00629 * Activation functors (per layer). The objects pointed by these 00630 * elements will be deleted when the parameters are delete. 00631 * 00632 * Default value: sigmoids 00633 */ 00634 std::vector<activationFunctor*> activationFunctions; 00635 }; 00636 00637 /** 00638 * default constructor 00639 */ 00640 MLP(); 00641 00642 /** 00643 * copy constructor 00644 * @param other the object to be copied 00645 */ 00646 MLP(const MLP& other); 00647 00648 /** 00649 * destructor 00650 */ 00651 virtual ~MLP(); 00652 00653 /** 00654 * returns the name of this type ("MLP") 00655 */ 00656 virtual const char* getTypeName() const; 00657 00658 /** 00659 * copy data of "other" classifier. 00660 * @param other the classifier to be copied 00661 * @return a reference to this classifier object 00662 */ 00663 MLP& copy(const MLP& other); 00664 00665 /** 00666 * alias for copy member 00667 * @param other the classifier to be copied 00668 * @return a reference to this classifier object 00669 */ 00670 MLP& operator=(const MLP& other); 00671 00672 /** 00673 * returns a pointer to a clone of this classifier. 00674 */ 00675 virtual classifier* clone() const; 00676 00677 /** 00678 * returns used parameters 00679 */ 00680 const parameters& getParameters() const; 00681 00682 /** 00683 * Supervised training. 00684 * The vectors in the <code>input</code> matrix 00685 * must be trained using as "known" classes the values given in 00686 * <code>ids</code>. 00687 * @param input the matrix with input vectors (each row is a training 00688 * vector) 00689 * @param ids the output classes ids for the input vectors. 00690 * @return true if successful, false otherwise. (if false you can check 00691 * the error message with getStatusString()) 00692 */ 00693 virtual bool train(const dmatrix& input, 00694 const ivector& ids); 00695 00696 /** 00697 * Supervised training. 00698 * 00699 * This method, used for debugging purposes mainly, initialize the 00700 * weights with the values given. 00701 * 00702 * The vectors in the \a input matrix must be trained using as "known" 00703 * classes the values given in \a ids. 00704 * 00705 * @param weights this vector is used to initialize the weights. Must 00706 * be consistent with the parameters. 00707 * @param input the matrix with input vectors (each row is a training 00708 * vector) 00709 * @param ids the output classes ids for the input vectors. 00710 * @return true if successful, false otherwise. (if false you can check 00711 * the error message with getStatusString()) 00712 */ 00713 virtual bool train(const dvector& weights, 00714 const dmatrix& input, 00715 const ivector& ids); 00716 00717 //TODO Check whether you really need a new classify method. 00718 // In some cases the superclasses method will suffice. Then just 00719 // delete the declaration and its implementation stump. 00720 00721 /** 00722 * Classification. 00723 * Classifies the feature and returns the outputVector with 00724 * the classification result. 00725 * @param feature the %vector to be classified 00726 * @param result the result of the classification 00727 * @return false if an error occurred during classification else true 00728 */ 00729 virtual bool classify(const dvector& feature, outputVector& result) const; 00730 00731 /** 00732 * write the rbf classifier in the given ioHandler 00733 * @param handler the ioHandler to be used 00734 * @param complete if true (the default) the enclosing begin/end will 00735 * be also written, otherwise only the data block will be written. 00736 * @return true if write was successful 00737 */ 00738 virtual bool write(ioHandler& handler,const bool complete=true) const; 00739 00740 /** 00741 * read the rbf classifier from the given ioHandler 00742 * @param handler the ioHandler to be used 00743 * @param complete if true (the default) the enclosing begin/end will 00744 * be also written, otherwise only the data block will be written. 00745 * @return true if write was successful 00746 */ 00747 virtual bool read(ioHandler& handler,const bool complete=true); 00748 00749 /* 00750 * Undocumented function for debug purposes only. It display the 00751 * internal weight matrices 00752 */ 00753 bool prettyPrint(); 00754 00755 /** 00756 * Return a reference to the internal weights vector. 00757 * Used mainly for debugging purposes. 00758 */ 00759 const dvector& getWeights() const; 00760 00761 00762 //TODO: comment the attributes of your classifier 00763 // If you add more attributes manually, do not forget to do following: 00764 // 1. indicate in the default constructor the default values 00765 // 2. make sure that the copy member also copy your new attributes, or 00766 // to ensure there, that these attributes are properly initialized. 00767 00768 private: 00769 00770 // used variables 00771 00772 /** 00773 * vector for saving the weights of the network 00774 */ 00775 vector<double> weights; 00776 00777 /** 00778 * indices for the begining of each layer in the weights vector 00779 */ 00780 ivector layerIndex; 00781 00782 /** 00783 * weights as matrix, to simplify propagation. 00784 * The matrices share the memory with weights. 00785 * 00786 * All matrices are initialized in initWeights() 00787 */ 00788 std::vector< dmatrix > matWeights; 00789 00790 /** 00791 * number of input units (determined by the size of the training data) 00792 */ 00793 int inputs; 00794 00795 /** 00796 * number of outputs (determined by training) 00797 */ 00798 int outputs; 00799 00800 /** 00801 * check how many outputs are required for the given 00802 * (external) ids, and set the outputs attribute. 00803 */ 00804 void checkHowManyOutputs(const ivector& ids); 00805 00806 00807 /** 00808 * unitsOut, save output of each unit 00809 * each layer ist one element of the vector 00810 * first layer has index 0 00811 */ 00812 mutable std::vector< dvector > unitsOut; 00813 00814 /** 00815 * net value at each layer 00816 */ 00817 mutable std::vector < dvector > unitsNet; 00818 00819 /** 00820 * Value for "off" at the output layer (it depends on the activation 00821 * functor of the output layer. 00822 */ 00823 double off; 00824 00825 /** 00826 * Value for "on" at the output layer (it depends on the activation 00827 * functor of the output layer. 00828 */ 00829 double on; 00830 00831 /** 00832 * initialize weights with random values. 00833 * This method assumes that the attributes inputs and outputs are correct. 00834 * 00835 * @param keepWeightVals if true, the contents of weights won't be 00836 * changed (if they are incompatible with the 00837 * actual parameters, false will be returned). 00838 * if false, the values of the weights will be 00839 * initialized with random values between 00840 * "from" and "to". 00841 * @param from lower value for random values 00842 * @param to higher value for random values 00843 * @return true if successful, false otherwise. 00844 */ 00845 bool initWeights(const bool keepWeightVals = false, 00846 const double& from = -1.0, 00847 const double& to = +1.0); 00848 00849 00850 /** 00851 * Given the weights vector, update the vector of matrices so that 00852 * each matrix uses its respective memory block in the vector. 00853 */ 00854 void updateWeightIndices(dvector& theWeights, 00855 std::vector<dmatrix>& theWMats) const; 00856 00857 00858 /** 00859 * accumulated error for one epoch 00860 */ 00861 double totalError; 00862 00863 /** 00864 * Error norm 00865 */ 00866 double errorNorm; 00867 00868 /** 00869 * calculate all outputs for all network units. The result will 00870 * be left in the unitsOut attribute. 00871 * 00872 * @param input vector 00873 */ 00874 bool propagate(const dvector& input) const; 00875 00876 /** 00877 * calculate all outputs for all network units. 00878 * 00879 * @param input input vector 00880 * @param mWeights weights in matrix form 00881 * @param uNet preliminary results for each unit with the "net" values 00882 * @param uOut output for all units (f(net)). 00883 * @return true if successful, false otherwise. 00884 */ 00885 bool propagate(const dvector& input, 00886 const std::vector<dmatrix>& mWeights, 00887 std::vector<dvector>& uNet, 00888 std::vector<dvector>& uOut) const; 00889 00890 00891 /** 00892 * compute the error using the last propagated input and the given 00893 * pattern 00894 */ 00895 bool computeActualError(const int id,double& error) const; 00896 00897 /** 00898 * compute the error using the given output units vector 00899 */ 00900 bool computePatternError(const int id, 00901 const dvector& outUnits, 00902 double& error) const; 00903 00904 /** 00905 * compute the error of the given weights for the whole training set. 00906 */ 00907 bool computeTotalError(const std::vector<dmatrix>& mWeights, 00908 const dmatrix& inputs, 00909 const ivector& ids, 00910 double& totalError) const; 00911 00912 /** 00913 * compute mat*vct' where vct' is a vector with one additional element 00914 * (1.0) at the beginning of vct. 00915 */ 00916 bool biasMultiply(const dmatrix& mat, 00917 const dvector& vct, 00918 dvector& res) const; 00919 00920 /** 00921 * calculate the negative gradient of error surface using 00922 * back-propagation algorithm 00923 * 00924 * @param input input vector 00925 * @param outputId desired output. This value must be between 0 and 00926 * the number of output elements-1. 00927 * @param grad computed gradient of the error surface 00928 * @return true if successful, or false otherwise. 00929 */ 00930 bool calcGradient(const dvector& input, 00931 const int outputId, 00932 dvector& grad); 00933 00934 /** 00935 * calculate negative gradient of error surface using 00936 * back-propagation algorithm for all patterns in an epoch. 00937 * 00938 * @param input input vector 00939 * @param outputId desired output. This value must be between 0 and 00940 * the number of output elements-1. 00941 * @param grad computed gradient of the error surface 00942 * @return true if successful, or false otherwise. 00943 */ 00944 bool calcGradient(const dmatrix& inputs, 00945 const ivector& ids, 00946 dvector& grad); 00947 00948 00949 /** 00950 * train the network with steepest descent method (batch mode) 00951 * Weights must be initialized previously 00952 */ 00953 bool trainSteepestBatch(const dmatrix& inputs, 00954 const ivector& internalIds); 00955 00956 00957 /** 00958 * train the network with steepest descent method (sequential mode) 00959 * Weights must be initialized previously 00960 */ 00961 bool trainSteepestSequential(const dmatrix& inputs, 00962 const ivector& internalIds); 00963 00964 00965 /** 00966 * train the network with steepest descent method (batch mode) 00967 * Weights must be initialized previously 00968 */ 00969 bool trainConjugateGradients(const dmatrix& inputs, 00970 const ivector& internalIds); 00971 00972 /** 00973 * compute the error norm, with which all displayed error values are 00974 * normalized. This will allow to compare easily different classifiers. 00975 */ 00976 bool computeErrorNorm(const ivector& internIds); 00977 00978 /** 00979 * line search computes the eta scalar factor at which the error 00980 * is minimized. It begins at the actual weight and follows the given 00981 * direction. 00982 */ 00983 bool lineSearch(const dmatrix& inputs, 00984 const ivector& ids, 00985 const dvector& direction, 00986 double& eta, 00987 dvector& newWeights) const; 00988 }; 00989 00990 00991 /** 00992 * write the functor::parameters in the given ioHandler. 00993 * The complete flag indicates 00994 * if the enclosing begin and end should be also be written or not 00995 */ 00996 bool write(ioHandler& handler,const MLP::activationFunctor& p, 00997 const bool complete = true); 00998 00999 /** 01000 * read the functor::parameters from the given ioHandler. 01001 * The complete flag indicates 01002 * if the enclosing begin and end should be also be written or not 01003 */ 01004 bool read(ioHandler& handler,MLP::activationFunctor& p, 01005 const bool complete = true); 01006 01007 } 01008 01009 #endif