latest version v1.9 - last update 10 Apr 2010 |
00001 /* 00002 * Copyright (C) 2003, 2004, 2005, 2006 00003 * Lehrstuhl fuer Technische Informatik, RWTH-Aachen, Germany 00004 * 00005 * This file is part of the LTI-Computer Vision Library (LTI-Lib) 00006 * 00007 * The LTI-Lib is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public License (LGPL) 00009 * as published by the Free Software Foundation; either version 2.1 of 00010 * the License, or (at your option) any later version. 00011 * 00012 * The LTI-Lib is distributed in the hope that it will be 00013 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 00014 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with the LTI-Lib; see the file LICENSE. If 00019 * not, write to the Free Software Foundation, Inc., 59 Temple Place - 00020 * Suite 330, Boston, MA 02111-1307, USA. 00021 */ 00022 00023 /*---------------------------------------------------------------- 00024 * project ....: LTI Digital Image/Signal Processing Library 00025 * file .......: ltikNNClassifier.h 00026 * authors ....: Frederik Lange 00027 * organization: LTI, RWTH Aachen 00028 * creation ...: 15.06.2003 00029 * revisions ..: $Id: ltiKNNClassifier.h,v 1.15 2006/02/07 18:18:59 ltilib Exp $ 00030 */ 00031 00032 #ifndef _LTI_KNN_CLASSIFIER_H_ 00033 #define _LTI_KNN_CLASSIFIER_H_ 00034 00035 #include "ltiSupervisedInstanceClassifier.h" 00036 #include "ltiKdTree.h" 00037 00038 #include <map> 00039 #include <vector> 00040 00041 namespace lti { 00042 /** 00043 * Implements a k nearest neighbors search based classifier. 00044 * 00045 * The simplest case of a k-nearest neighbor classifier is for k=1, also 00046 * known as a nearest neighbor classifier, which assings as winner class for 00047 * a test point \a x the one belonging to the nearest sample point. 00048 * 00049 * For k>1, a k nearest neighbor classifier assigns to a point \a x the class 00050 * most represented in the k nearest neighbors. In the simplest case, each 00051 * of the k nearest sample points vote with the same weight for their 00052 * respective classes. In more sophisticated cases, each point will vote 00053 * with a weight depending on the total number of sample points of its class 00054 * and/or the ratio between the distance of the test point to the winner 00055 * sample and the distance of the test point to the first sample point 00056 * belonging to another class. 00057 * 00058 * At this point, only the use of Euclidean distance is allowed. 00059 * 00060 * This classifier uses a kd-Tree to perform the search in an efficient 00061 * manner, but shows therefore also the drawbacks of a normal kd-Tree: it is 00062 * not suitable for higher dimensional spaces. If you use high-dimensional 00063 * spaces, maybe you should try increasing the bucket size, or activating 00064 * the best-bin-first mode, which is a suggestion of David Lowe to get 00065 * an aproximative solution (accurate enough) in much less time. 00066 * 00067 * This classificator differs a little bit from the other LTI-Lib 00068 * classificators. Since the whole training set is stored as sample points, 00069 * it is useful in many applications to obtain, besides the winner class, 00070 * the exact winner samples. Therefore, this class administrates two sets 00071 * of ID numbers. One set for the classe IDs, which is used the same way 00072 * than all other LTI-Lib classifiers, and a second set that administrates 00073 * IDs for each sample point. This second set can be explicitelly given, or 00074 * generated automatically. You can then for example use some tables 00075 * containing additional information for each winner point, that are accessed 00076 * using the point ID. 00077 * 00078 * Example: 00079 * 00080 * \code 00081 * 00082 * #include <iostream> 00083 * #include <fstream> 00084 * 00085 * #include "ltiKNNClassifier.h" 00086 * #include "ltiLispStreamHandler.h" 00087 * 00088 * // ... 00089 * 00090 * double inData[] = {-1,-1, 00091 * -1, 0, 00092 * -1,+1, 00093 * +0,+1, 00094 * +1,+1, 00095 * +1,+0, 00096 * +1,-1, 00097 * +0,-1, 00098 * +0,+0}; 00099 * 00100 * lti::dmatrix inputs(9,2,inData); // training vectors 00101 * 00102 * int idsData[] = {1,0,2,1,0,2,1,0,1}; // and the respective ids 00103 * lti::ivector ids(9,idsData); 00104 * 00105 * lti::kNNClassifier knn; // our nearest neighbor classifier 00106 * 00107 * lti::kNNClassifier::parameters param; 00108 * param.useReliabilityMeasure = false; 00109 * knn.setParameters(param); 00110 * 00111 * // we want to see some info while training 00112 * streamProgressInfo prog(std::cout); 00113 * knn.setProgressObject(prog); 00114 * 00115 * // train the network 00116 * knn.train(inputs,ids); 00117 * 00118 * // let us save our network for future use 00119 * // in the file called mlp.dat 00120 * std::ofstream out("knn.dat"); 00121 * lti::lispStreamHandler lsh(out); 00122 * 00123 * // save the network 00124 * knn.write(lsh); 00125 * // close the file 00126 * out.close(); 00127 * 00128 * // show some results with the same training set: 00129 * 00130 * lti::kNNClassifier::outputVector outv; // here we will get some 00131 * // classification results 00132 * std::cout << std::endl << "Results: " << std::endl; 00133 * 00134 * int id; 00135 * dvector sample(2,0.0); 00136 * // generate some points and check which would be the winner class 00137 * for (sample.at(0) = -1; sample.at(0) <= 1; sample.at(0)+=0.25) { 00138 * for (sample.at(1) = -1; sample.at(1) <= 1; sample.at(1)+=0.25) { 00139 * 00140 * knn.classify(sample,outv); 00141 * std::cout << "Input " << sample << " \tOutput: "; 00142 * outv.getId(outv.getWinnerUnit(),id); 00143 * std::cout << id; 00144 * std::cout << std::endl; 00145 * } 00146 * } 00147 * 00148 * \endcode 00149 */ 00150 class kNNClassifier : public supervisedInstanceClassifier { 00151 public: 00152 00153 /** 00154 * the parameters for the class kNNClassifier 00155 */ 00156 class parameters : public supervisedInstanceClassifier::parameters { 00157 public: 00158 00159 /** 00160 * default constructor 00161 */ 00162 parameters(); 00163 00164 /** 00165 * copy constructor 00166 * @param other the parameters object to be copied 00167 */ 00168 parameters(const parameters& other); 00169 00170 /** 00171 * destructor 00172 */ 00173 virtual ~parameters(); 00174 00175 /** 00176 * returns name of this type. 00177 */ 00178 const char* getTypeName() const; 00179 00180 /** 00181 * copy the contents of a parameters object 00182 * @param other the parameters object to be copied 00183 * @return a reference to this parameters object 00184 */ 00185 parameters& copy(const parameters& other); 00186 00187 /** 00188 * copy the contents of a parameters object 00189 * @param other the parameters object to be copied 00190 * @return a reference to this parameters object 00191 */ 00192 parameters& operator=(const parameters& other); 00193 00194 00195 /** 00196 * returns a pointer to a clone of the parameters 00197 */ 00198 virtual classifier::parameters* clone() const; 00199 00200 /** 00201 * write the parameters in the given ioHandler 00202 * @param handler the ioHandler to be used 00203 * @param complete if true (the default) the enclosing begin/end will 00204 * be also written, otherwise only the data block will be written. 00205 * @return true if write was successful 00206 */ 00207 virtual bool write(ioHandler& handler,const bool complete=true) const; 00208 00209 /** 00210 * read the parameters from the given ioHandler 00211 * @param handler the ioHandler to be used 00212 * @param complete if true (the default) the enclosing begin/end will 00213 * be also written, otherwise only the data block will be written. 00214 * @return true if write was successful 00215 */ 00216 virtual bool read(ioHandler& handler,const bool complete=true); 00217 00218 # ifdef _LTI_MSC_6 00219 /** 00220 * this function is required by MSVC only, as a workaround for a 00221 * very awful bug, which exists since MSVC V.4.0, and still by 00222 * V.6.0 with all bugfixes (so called "service packs") remains 00223 * there... This method is also public due to another bug, so please 00224 * NEVER EVER call this method directly: use read() instead 00225 */ 00226 bool readMS(ioHandler& handler,const bool complete=true); 00227 00228 /** 00229 * this function is required by MSVC only, as a workaround for a 00230 * very awful bug, which exists since MSVC V.4.0, and still by 00231 * V.6.0 with all bugfixes (so called "service packs") remains 00232 * there... This method is also public due to another bug, so please 00233 * NEVER EVER call this method directly: use write() instead 00234 */ 00235 bool writeMS(ioHandler& handler,const bool complete=true) const; 00236 # endif 00237 00238 // ------------------------------------------------ 00239 // the parameters 00240 // ------------------------------------------------ 00241 00242 /** 00243 * How many nearest neighbors should be determined per classification. 00244 * 00245 * Default value: 1 (i.e. nearest neighbor classifier) 00246 */ 00247 int kNN; 00248 00249 /** 00250 * Normalize data to equal number of data samples. 00251 * 00252 * The traditional k-NN classificator assumes that the a-priori 00253 * probability of a class is given as the number of patterns belonging 00254 * to the class divided by the total number of patterns. In many 00255 * recognition classifications, however, the classes are all 00256 * equiprobable. If normalizeData is set to true this second 00257 * alternative will be chosen. In other words, if set to true, the 00258 * samples will be weighted relative to the number of samples per class. 00259 * If set to false, each sample has the weight 1. 00260 * 00261 * Default value is true 00262 */ 00263 bool normalizeData; 00264 00265 /** 00266 * Normalize the output vector. 00267 * 00268 * The k-nearest neighbor algorithm counts how many elements per class 00269 * are present in the nearest k points to the test point. This voting 00270 * can be altered by the \a normalizeData parameter to count not one 00271 * per class, but 1/nc, where nc is the number of elements of the 00272 * corresponding class in the training set. 00273 * 00274 * The output can be returned "as is" setting this parameter to false, 00275 * or can be normalized to become a probability value setting this 00276 * parameter to true. 00277 * 00278 * Default value: true 00279 */ 00280 bool normalizeOutput; 00281 00282 /** 00283 * @name Reliability 00284 */ 00285 //@{ 00286 /** 00287 * Use the reliability measure suggested by Lowe. 00288 * 00289 * Lowe suggested in his paper: "Distinctive Image Features from 00290 * Scale Invariant Keypoints", June, 2003 the use of a 00291 * reliability measure for classification. It is defined as 00292 * the ratio between the distance from the analyzed point \a p 00293 * to the closest sample point \a w, and the distance from the 00294 * same point \a p to the closest point that belongs to a class 00295 * different to the one of the point \a w. 00296 * 00297 * You usually use this mode with kNN=1. The normalization of the 00298 * output should be deactivated. 00299 * 00300 * Default value: false 00301 */ 00302 bool useReliabilityMeasure; 00303 00304 /** 00305 * If useReliabilityMeasure is true, then the weight of a point 00306 * can be determined using the ratio of two distances, but there 00307 * are many possiblities to consider this ratio. Several simple 00308 * modes are provided here. Let \e d1 be the distance to the 00309 * winner sample point and \e d2 the distance to the closest point 00310 * belonging to a different class than the winner. 00311 */ 00312 enum eReliabilityMode { 00313 Linear, /**< The weight is determined as 00314 min(1.0,((d2/d1)-1)/(threshold-1))). The threshold 00315 must be strict greater than 1.0 */ 00316 Exponential, /**< The weight is determined as 00317 1.0 - exp(-(d2/d1 - 1)/threshold) */ 00318 }; 00319 00320 /** 00321 * Reliability mode used. 00322 * 00323 * For possible values see eReliabilityMode. 00324 * 00325 * Default value: Linear 00326 */ 00327 eReliabilityMode reliabilityMode; 00328 00329 /** 00330 * Threshold value used for the reliability function 00331 * 00332 * Default value: 10, i.e. distances ratios greater than d2/d1 should be 00333 * consider with the same weight. 00334 */ 00335 double reliabilityThreshold; 00336 00337 /** 00338 * Maximal number of neighbors considered while detecting the 00339 * second point belonging to another class than the winner. If 00340 * no point was found within this number of points, a "perfectly" 00341 * reliable point will be assumed. 00342 * 00343 * Default value: 20 00344 */ 00345 int maxUnreliableNeighborhood; 00346 00347 //@} 00348 00349 /** 00350 * @name Nearest neighbor search options 00351 * 00352 * The search is optimized using a kd-Tree. The parameters in 00353 * this group control how the tree is organized or how to search 00354 * for the data therein. 00355 */ 00356 //@{ 00357 /** 00358 * Best Bin First. 00359 * 00360 * If this parameter is set to true, the Best Bin First (BBF) algorithm 00361 * of Lowe et. al will be applied. It is an approximative algorithm 00362 * appropriate for spaces of relative high dimensionality (100 or so) 00363 * in which some improbable bins are discarded in the search. 00364 * 00365 * Note that if you activate this, the result is possible optimal, but 00366 * not optimal. 00367 * 00368 * Default value: false 00369 */ 00370 bool bestBinFirst; 00371 00372 /** 00373 * Maximum visit number per leaf node allowed. 00374 * 00375 * This is only required if you specify a best-bin-first (BBF) 00376 * search. It is the maximal number of visits allowed for leaf 00377 * nodes. (in the original paper is called Emax). 00378 * 00379 * Usually this value depends on many factors. You can set it 00380 * as a percentage of the expected number of leaf nodes 00381 * (approximately number of points/bucketSize). 00382 * 00383 * Default value: 100, but you should set it in order for BBF to work 00384 * appropriatelly. 00385 */ 00386 int eMax; 00387 00388 /** 00389 * Bucket size. 00390 * 00391 * Each node of the kd-Tree can contain several points. The search 00392 * within a node is made with linear search, i.e. the brute force method 00393 * computing the distances to each single point. This parameter 00394 * indicates the number of points that will be stored in the node. 00395 * The name "bucket" comes from the original kd-Tree paper. 00396 * 00397 * Default value: 5 00398 */ 00399 int bucketSize; 00400 //@} 00401 }; 00402 00403 /** 00404 * default constructor 00405 */ 00406 kNNClassifier(); 00407 00408 /** 00409 * copy constructor 00410 * @param other the object to be copied 00411 */ 00412 kNNClassifier(const kNNClassifier& other); 00413 00414 /** 00415 * destructor 00416 */ 00417 virtual ~kNNClassifier(); 00418 00419 /** 00420 * returns the name of this type ("kNNClassifier") 00421 */ 00422 virtual const char* getTypeName() const; 00423 00424 /** 00425 * copy data of "other" clustering. 00426 * @param other the clustering to be copied 00427 * @return a reference to this clustering object 00428 */ 00429 kNNClassifier& copy(const kNNClassifier& other); 00430 00431 /** 00432 * alias for copy member 00433 * @param other the clustering to be copied 00434 * @return a reference to this clustering object 00435 */ 00436 kNNClassifier& operator=(const kNNClassifier& other); 00437 00438 /** 00439 * returns a pointer to a clone of this clustering. 00440 */ 00441 virtual classifier* clone() const; 00442 00443 /** 00444 * returns used parameters 00445 */ 00446 const kNNClassifier::parameters& getParameters() const; 00447 00448 /** 00449 * Supervised training. The vectors in the <code>input</code> matrix 00450 * are arranged row-wise, i.e. each row contains one data vector. 00451 * The <code>ids</code> vector contains the class label for each row. 00452 * 00453 * This is an alternative method to trainObject(). You cannot add further 00454 * objects after you have called train(), nor can you call train() 00455 * after calling trainObject(), since all data provided with trainObject() 00456 * would be removed. In other words, you must decide if you want to 00457 * supply all objects separately or if you want to give them all 00458 * simultaneously, but you cannot combine both methods. 00459 * 00460 * As ids for each feature point the index of the corresponding matrix 00461 * row will be used. 00462 * 00463 * @param input the matrix with the input vectors (each row is a training 00464 * vector) 00465 * @param ids vector of class ids for each input point 00466 * 00467 * @return true if successful, false otherwise. (if false you can check 00468 * the error message with getStatusString()) 00469 */ 00470 virtual bool train(const dmatrix& input, 00471 const ivector& ids); 00472 00473 /** 00474 * Supervised training. The vectors in the <code>input</code> matrix 00475 * are arranged row-wise, i.e. each row contains one data vector. 00476 * The <code>ids</code> vector contains the class label for each row. 00477 * 00478 * This is an alternative method to trainObject(). You cannot add further 00479 * objects after you have called train(), nor can you call train() 00480 * after calling trainObject(), since all data provided with trainObject() 00481 * would be removed. In other words, you must decide if you want to 00482 * supply all objects separately or if you want to give them all 00483 * simultaneously, but you cannot combine both methods. 00484 * 00485 * As ids for each feature point the index of the corresponding values 00486 * will be used. 00487 * 00488 * @param input the matrix with the input vectors (each row is a training 00489 * vector) 00490 * @param ids vector of class ids for each input point 00491 * @param pointIds vector containing the ids for each single feature point 00492 * of the training set. 00493 * 00494 * @return true if successful, false otherwise. (if false you can check 00495 * the error message with getStatusString()) 00496 */ 00497 virtual bool train(const dmatrix& input, 00498 const ivector& ids, 00499 const ivector& pointIds); 00500 00501 /** 00502 * Adds an object to this classifier. The id is given automatically 00503 * and returned in the parameter. 00504 * 00505 * After you have trained several objects, you must call the build() 00506 * method to finish the training process. If you don't do it, the 00507 * classifier will ignore everything you have provided. 00508 * 00509 * This is an alternative method to train(). You cannot add further 00510 * objects after you have called train, nor can you call train() 00511 * after calling this method, since all data provided with trainObject 00512 * would be removed. In other words, you must decide if you want to 00513 * supply all objects separately or if you want to give them all 00514 * simultaneously, but you cannot combine both methods. 00515 * 00516 * Note that the difference with the method trainObjectId() is that here 00517 * you receive as as returned argument the id assigned to the object, 00518 * while in the method trainObjectId() you decide which id should be 00519 * used for the given object. 00520 * 00521 * As id for each point in the given matrix will be used the row index 00522 * plus the number of points trained until now, i.e. just the successive 00523 * numeration of each sample point will be continued. 00524 * 00525 * @param input each row of this matrix represents a point in the feature 00526 * space belonging to one single class. 00527 * @param id this id will be used for the class represented by the 00528 * points in the \a input matrix. 00529 * @return true if successful, false otherwise. 00530 */ 00531 virtual bool trainObject(const dmatrix& input, int& id); 00532 00533 /** 00534 * Adds an object to this classifier. The id is given automatically 00535 * and returned in the parameter. 00536 * 00537 * After you have trained several objects, you must call the build() 00538 * method to finish the training process. If you don't do it, the 00539 * classifier will ignore everything you have provided. 00540 * 00541 * This is an alternative method to train(). You cannot add further 00542 * objects after you have called train, nor can you call train() 00543 * after calling this method, since all data provided with trainObject 00544 * would be removed. In other words, you must decide if you want to 00545 * supply all objects separately or if you want to give them all 00546 * simultaneously, but you cannot combine both methods. 00547 * 00548 * Note that the difference with the method trainObjectId() is that here 00549 * you will receive as a returned argument the id assigned to the object, 00550 * while in the method trainObjectId() you decide which id should be 00551 * used for the given object. 00552 * 00553 * @param input each row of this matrix represents a point in the feature 00554 * space belonging to one single class. 00555 * @param id this id will be used for the class represented by the 00556 * points in the \a input matrix. 00557 * @param pointIds each point in the \a input matrix will have its own 00558 * id given by the entries of this vector, which must have 00559 * a size equal to the number of rows of \a input. 00560 * 00561 * @return true if successful, false otherwise. 00562 */ 00563 virtual bool trainObject(const dmatrix& input, 00564 int& id, 00565 const ivector& pointIds); 00566 00567 /** 00568 * Adds an object to this classifier. The object ID is given by the user. 00569 * 00570 * After you have trained several objects, you must call the build() 00571 * method to finish the training process. If you don't do it, the 00572 * classifier will ignore everything you have provided. 00573 * 00574 * This is an alternative method to train(). You cannot add further 00575 * objects after you have called train, nor can you call train() 00576 * after calling this method, since all data provided with trainObject 00577 * would be removed. In other words, you must decide if you want to 00578 * supply all objects separately or if you want to give them all 00579 * simultaneously, but you cannot combine both methods. 00580 * 00581 * Note that the difference with the method trainObject() is that here you 00582 * directly provide the id to be used for the object, while the 00583 * trainObject() method returns one id that is computed automatically. 00584 * 00585 * As id for each point in the given matrix will be used the row index 00586 * plus the number of points trained until now, i.e. just the successive 00587 * numeration of each sample point will be continued. 00588 * 00589 * @param input each row of this matrix represents a point in the feature 00590 * space belonging to one single class. 00591 * @param id this id will be used for the class represented by the 00592 * points in the \a input matrix. 00593 * @return true if successful, false otherwise. 00594 */ 00595 virtual bool trainObjectId(const dmatrix& input, 00596 const int id); 00597 00598 /** 00599 * Adds an object to this classifier. The object ID is given by the user. 00600 * 00601 * After you have trained several objects, you must call the build() 00602 * method to finish the training process. If you don't do it, the 00603 * classifier will ignore everything you have provided. 00604 * 00605 * This is an alternative method to train(). You cannot add further 00606 * objects after you have called train, nor can you call train() 00607 * after calling this method, since all data provided with trainObject 00608 * would be removed. In other words, you must decide if you want to 00609 * supply all objects separately or if you want to give them all 00610 * simultaneously, but you cannot combine both methods. 00611 * 00612 * Note that the difference with the method trainObject() is that here you 00613 * directly provide the id to be used for the object, while the 00614 * trainObject() method returns one id that is computed automatically. 00615 * 00616 * @param input each row of this matrix represents a point in the feature 00617 * space belonging to one single class. 00618 * @param id this id will be used for the class represented by the 00619 * points in the \a input matrix. 00620 * @param pointIds each point in the \a input matrix will have its own 00621 * ID, given by the entries in this vector, which must have 00622 * the same size than the number of rows of \a input. 00623 * @return true if successful, false otherwise. 00624 */ 00625 virtual bool trainObjectId(const dmatrix& input, 00626 const int id, 00627 const ivector& pointIds); 00628 00629 /** 00630 * Classification. 00631 * 00632 * Classifies the feature and returns the outputVector %object with 00633 * the classification result. 00634 * 00635 * <b>NOTE:</b> This method is NOT really const. Although the main 00636 * members of the kNNClassifier are not changed some state variables used 00637 * for efficiency are. Thus, it is not save to use the same instance of 00638 * the kNNClassifier in two different threads. 00639 * 00640 * @param feature pattern to be classified 00641 * @param result of the classifications as a classifier::outputVector 00642 * @return true if the classification has been successful 00643 */ 00644 virtual bool classify(const dvector& feature, 00645 outputVector& result) const; 00646 00647 /** 00648 * Classification. 00649 * 00650 * Classifies all features (the rows of the matrix) and returns 00651 * the outputVector %object with the classification result. 00652 * 00653 * The classification will be the accumulation of the voting for all 00654 * given points, assuming that they all belong to the same class. 00655 * 00656 * <b>NOTE:</b> This method is NOT really const. Although the main members 00657 * of the kNNClassifier are not changed some state variables used for 00658 * efficiency are. Thus, it is not save to use the same instance of the 00659 * kNNClassifier in two different threads. 00660 * 00661 * @param features patterns to be classified each row is one feature 00662 * @param result of the classifications as a classifier::outputVector 00663 * @return true if the classification has been successful 00664 */ 00665 virtual bool classify(const dmatrix& features, 00666 outputVector& result) const; 00667 00668 /** 00669 * Classification. 00670 * 00671 * Classifies all features (the rows of the matrix) and returns 00672 * for each of them a vector of unnormalized probabilities, coded in the 00673 * rows of the \a result matrix. 00674 * 00675 * Since no classifier::outputVector is constructed, only the 00676 * classification "raw data" is produced. 00677 * 00678 * This method is used in recognition tasks based on many local hints, for 00679 * which the individual classification of each feature vector would cost 00680 * too much time. 00681 * 00682 * Each column of the output matrix represent one object. To get 00683 * the id represented by a vector column you can use the outputTemplate 00684 * of the classifier: 00685 * 00686 * \code 00687 * kNNClassifier knn; 00688 * 00689 * knn.train(data); 00690 * 00691 * int columnId = knn.getOutputTemplate().getIds().at(columnNumber); 00692 * \endcode 00693 * 00694 * or the shortcut method of this class getColumnId() 00695 * 00696 * <b>NOTE:</b> This method is NOT really const. Although the main members 00697 * of the kNNClassifier are not changed some, state variables used for 00698 * efficiency are. Thus, it is not save to use the same instance of the 00699 * kNNClassifier in two different threads. 00700 * 00701 * @param features patterns to be classified each row is one feature 00702 * @param result of the classifications as a classifier::outputVector 00703 * @return true if the classification has been successful 00704 */ 00705 virtual bool classify(const dmatrix& features, 00706 dmatrix& result) const; 00707 00708 /** 00709 * Shortcut method to comfortably access the object id for the column 00710 * of the result matrix of the classify(const dmatrix&,dmatrix&) method. 00711 * 00712 * It returns a negative value if the input column index is invalid. 00713 */ 00714 inline int getColumnId(const int columnId) const { 00715 if (static_cast<uint32>(columnId)<static_cast<uint32>(nClasses)) { 00716 return getOutputTemplate().getIds().at(columnId); 00717 } 00718 return -1; 00719 } 00720 00721 00722 /** 00723 * Information about a feature point. 00724 * 00725 * If you classify a point in the feature space and you are interested 00726 * in all available information about this point, this structure type will 00727 * be used. 00728 */ 00729 struct pointInfo { 00730 /** 00731 * Default constructor. 00732 * 00733 * Initializes all members with invalid values (negative Ids and null 00734 * pointers) 00735 */ 00736 pointInfo(); 00737 00738 /** 00739 * Constant reference to the feature point 00740 */ 00741 const dvector* point; 00742 00743 /** 00744 * Class ID for the point 00745 */ 00746 int classId; 00747 00748 /** 00749 * ID for the point itself 00750 */ 00751 int pointId; 00752 00753 /** 00754 * Distance to the test point 00755 */ 00756 double distance; 00757 }; 00758 00759 /** 00760 * Classification. 00761 * 00762 * Classifies the feature and returns the outputVector %object with 00763 * the classification result. 00764 * 00765 * <b>NOTE:</b> This method is NOT really const. Although the main 00766 * members of the kNNClassifier are not changed some state variables used 00767 * for efficiency are. Thus, it is not save to use the same instance of 00768 * the kNNClassifier in two different threads. 00769 * 00770 * @param feature pattern to be classified 00771 * @param result of the classifications as a classifier::outputVector 00772 * @param points vector sorted in increasing order of the distances to 00773 * the feature point and containing two ID numbers. The 00774 * first one corresponds to the class id, the second one 00775 * to the point id. Also a const pointer to the feature 00776 * point of the train set and the distance to that point 00777 * are contained in the pointInfo structure 00778 * @return true if the classification has been successful 00779 */ 00780 virtual bool classify(const dvector& feature, 00781 outputVector& result, 00782 std::vector<pointInfo>& points) const; 00783 00784 /** 00785 * Get only the nearest point to the given vector. 00786 * 00787 * Sometimes it is not necessary to have the probability distribution for 00788 * the objects computed with the classify() methods. Only the nearest 00789 * point can be of interest. This method provides an efficient way to 00790 * just search for the nearest point and obtain its data. 00791 * 00792 * @param feature reference multidimensional point 00793 * @param nearestPoint nearest point in the training set to the presented 00794 * point. 00795 * @return true if search was successful, false otherwise. 00796 */ 00797 virtual bool nearest(const dvector& feature, 00798 pointInfo& nearestPoint) const; 00799 00800 00801 /** 00802 * write the classifier in the given ioHandler 00803 * @param handler the ioHandler to be used 00804 * @param complete if true (the default) the enclosing begin/end will 00805 * be also written, otherwise only the data block will be written. 00806 * @return true if write was successful 00807 */ 00808 virtual bool write(ioHandler& handler,const bool complete=true) const; 00809 00810 /** 00811 * read the classifier from the given ioHandler 00812 * @param handler the ioHandler to be used 00813 * @param complete if true (the default) the enclosing begin/end will 00814 * be also written, otherwise only the data block will be written. 00815 * @return true if write was successful 00816 */ 00817 virtual bool read(ioHandler& handler,const bool complete=true); 00818 00819 /** 00820 * Finish a training process. 00821 * 00822 * If you used the methods trainObject() or trainObjectId() you must 00823 * call this method in order to complete the training process. 00824 * 00825 * If you used one of the train() methods, you must avoid calling this 00826 * method. 00827 * 00828 * Remember that the "incremental" training mode with trainObject() or 00829 * trainObjectId() cannot be combined with a "at-once" training step 00830 * using the method train(). 00831 * 00832 * @see trainObject(),trainObjectId,reset() 00833 */ 00834 void build(); 00835 00836 /** 00837 * Resets all values and deletes the content. 00838 * 00839 * If you want to forget the sample points and start giving new points 00840 * with trainObject, you need to call this method first 00841 */ 00842 void clear(); 00843 00844 protected: 00845 00846 /** 00847 * @name Reliability weighting functions 00848 */ 00849 //@{ 00850 static double linear(const double& ratio,const double& threshold); 00851 static double exponential(const double& ratio,const double& threshold); 00852 //@} 00853 00854 /** 00855 * mapping of the ids 00856 */ 00857 void buildIdMaps(const ivector& ids); 00858 00859 /** 00860 * Define the output template 00861 */ 00862 void defineOutputTemplate(); 00863 00864 /** 00865 * Type for maps mapping ids from internal to external and viceversa 00866 */ 00867 typedef std::map<int,int> idMap_type; 00868 00869 /** 00870 * Map from external id to internal id, used for training 00871 */ 00872 idMap_type idMap; 00873 00874 /** 00875 * Map from internal to external id, used for training 00876 */ 00877 idMap_type rIdMap; 00878 00879 /** 00880 * Number of classes currently in the classifier 00881 */ 00882 int nClasses; 00883 00884 /** 00885 * Exact kd-Tree type used for the database 00886 * 00887 * The data in the tree is composed by two id numbers: 00888 * - The first component contains the object or class id 00889 * - The second component contains the point id, since using kNN it 00890 * can be necessary to know exactly which points in the training set 00891 * were the winners. 00892 */ 00893 typedef kdTree< dvector, std::pair<int,int> > treeType; 00894 00895 00896 /** 00897 * The database with accelerated nearest neighbor search. 00898 * 00899 * The kdTree uses as n-dimensional points dvectors and as data requires 00900 * a std::pair containing the class id and the point id. 00901 */ 00902 treeType databaseTree; 00903 00904 /** 00905 * Optionally, a scalar weight for each can be applied, as a-priori value. 00906 * 00907 * The std::vector is used due to the push_back interface. 00908 * 00909 * It is accessed with the internal id. 00910 */ 00911 std::vector<double> classWeight; 00912 00913 /** 00914 * Minimum number of points per class. 00915 * 00916 * This attribute is valid only after the complete training process 00917 */ 00918 int minPointsPerClass; 00919 00920 /** 00921 * Maximum number of points per class 00922 * 00923 * This attribute is valid only after the complete training process 00924 */ 00925 int maxPointsPerClass; 00926 00927 /** 00928 * Helper for classification 00929 */ 00930 bool classify(const dvector& feature, 00931 outputVector& output, 00932 std::multimap<double,treeType::element*>& resList) const; 00933 00934 }; 00935 00936 } 00937 00938 #endif