GClasses
|
The k-Nearest Neighbor learning algorithm. More...
#include <GKNN.h>
Public Types | |
enum | InterpolationMethod { Linear, Mean, Learner } |
Public Member Functions | |
GKNN (GRand &rand) | |
nNeighbors specifies the number of neighbors to evaluate in order to make a prediction. | |
GKNN (GDomNode *pNode, GLearnerLoader &ll) | |
Load from a DOM. | |
virtual | ~GKNN () |
size_t | neighborCount () |
Returns the number of neighbors. | |
void | setNeighborCount (size_t k) |
Specify the number of neighbors to use. (The default is 1.) | |
virtual GDomNode * | serialize (GDom *pDoc) |
Marshal this object into a DOM, which can then be converted to a variety of serial formats. | |
virtual void | trainSparse (GSparseMatrix &features, GMatrix &labels) |
See the comment for GIncrementalLearner::trainSparse. | |
virtual void | clear () |
Discard any training (but not any settings) so it can be trained again. | |
void | setMetric (GRowDistanceScaled *pMetric, bool own) |
Sets the distance metric to use for finding neighbors. If own is true, then this object will delete pMetric when it is done with it. | |
void | setMetric (GSparseSimilarity *pMetric, bool own) |
Sets the sparse similarity metric to use for finding neighbors. If own is true, then this object will delete pMetric when it is done with it. | |
void | setInterpolationMethod (InterpolationMethod eMethod) |
Sets the technique for interpolation. (If you want to use the "Learner" method, you should call SetInterpolationLearner instead of this method.) | |
void | setInterpolationLearner (GSupervisedLearner *pLearner, bool bTakeOwnership) |
Sets the interpolation method to "Learner" and sets the learner to use. If bTakeOwnership is true, it will delete the learner when this object is deleted. | |
size_t | addVector (const double *pIn, const double *pOut) |
Adds a copy of pVector to the internal set. | |
void | setElbowRoom (double d) |
Sets the value for elbow room. (This value is only used with incremental training.) | |
GRowDistanceScaled * | metric () |
Returns the dissimilarity metric. | |
void | setOptimizeScaleFactors (bool b) |
If you set this to true, it will use a hill-climber to optimize the attribute scaling factors. If you set it to false (the default), it won't. | |
GMatrix * | features () |
Returns the internal feature set. | |
GSparseMatrix * | sparseFeatures () |
Returns the internal set of sparse features. | |
GMatrix * | labels () |
Returns the internal label set. | |
void | autoTune (GMatrix &features, GMatrix &labels) |
Uses cross-validation to find a set of parameters that works well with the provided data. | |
Static Public Member Functions | |
static void | test () |
Performs unit tests for this class. Throws an exception if there is a failure. | |
Protected Member Functions | |
virtual void | trainInner (GMatrix &features, GMatrix &labels) |
See the comment for GSupervisedLearner::trainInner. | |
virtual void | predictInner (const double *pIn, double *pOut) |
See the comment for GSupervisedLearner::predictInner. | |
virtual void | predictDistributionInner (const double *pIn, GPrediction *pOut) |
See the comment for GSupervisedLearner::predictDistributionInner. | |
virtual void | beginIncrementalLearningInner (sp_relation &pFeatureRel, sp_relation &pLabelRel) |
See the comment for GIncrementalLearner::beginIncrementalLearningInner. | |
virtual void | trainIncrementalInner (const double *pIn, const double *pOut) |
Adds a vector to the internal set. Also, if the (k+1)th nearest neighbor of that vector is less than "elbow room" from it, then the closest neighbor is deleted from the internal set. (You might be wondering why the decision to delete the closest neighbor is determined by the distance of the (k+1)th neigbor. This enables a clump of k points to form in the most frequently sampled locations. Also, If you make this decision based on a closer neighbor, then big holes may form in the model if points are sampled in a poor order.) Call SetElbowRoom to specify the elbow room distance. | |
void | findNeighbors (const double *pVector) |
Finds the nearest neighbors of pVector. | |
void | interpolateMean (const double *pIn, GPrediction *pOut, double *pOut2) |
Interpolate with each neighbor having equal vote. | |
void | interpolateLinear (const double *pIn, GPrediction *pOut, double *pOut2) |
Interpolate with each neighbor having a linear vote. (Actually it's linear with respect to the squared distance instead of the distance, because this is faster to compute.) | |
void | interpolateLearner (const double *pIn, GPrediction *pOut, double *pOut2) |
Interpolates with the provided supervised learning algorithm. | |
virtual bool | canImplicitlyHandleMissingFeatures () |
See the comment for GTransducer::canImplicitlyHandleMissingFeatures. | |
Protected Attributes | |
GMatrix * | m_pFeatures |
GSparseMatrix * | m_pSparseFeatures |
GMatrix * | m_pLabels |
size_t | m_nNeighbors |
InterpolationMethod | m_eInterpolationMethod |
GSupervisedLearner * | m_pLearner |
bool | m_bOwnLearner |
double | m_dElbowRoom |
bool | m_optimizeScaleFactors |
GRowDistanceScaled * | m_pDistanceMetric |
GSparseSimilarity * | m_pSparseMetric |
bool | m_ownMetric |
GKnnScaleFactorCritic * | m_pCritic |
GOptimizer * | m_pScaleFactorOptimizer |
size_t * | m_pEvalNeighbors |
double * | m_pEvalDistances |
double * | m_pValueCounts |
GNeighborFinderGeneralizing * | m_pNeighborFinder |
GNeighborFinderGeneralizing * | m_pNeighborFinder2 |
The k-Nearest Neighbor learning algorithm.
GClasses::GKNN::GKNN | ( | GRand & | rand | ) |
nNeighbors specifies the number of neighbors to evaluate in order to make a prediction.
GClasses::GKNN::GKNN | ( | GDomNode * | pNode, |
GLearnerLoader & | ll | ||
) |
Load from a DOM.
virtual GClasses::GKNN::~GKNN | ( | ) | [virtual] |
size_t GClasses::GKNN::addVector | ( | const double * | pIn, |
const double * | pOut | ||
) |
Adds a copy of pVector to the internal set.
Uses cross-validation to find a set of parameters that works well with the provided data.
virtual void GClasses::GKNN::beginIncrementalLearningInner | ( | sp_relation & | pFeatureRel, |
sp_relation & | pLabelRel | ||
) | [protected, virtual] |
See the comment for GIncrementalLearner::beginIncrementalLearningInner.
Implements GClasses::GIncrementalLearner.
virtual bool GClasses::GKNN::canImplicitlyHandleMissingFeatures | ( | ) | [inline, protected, virtual] |
See the comment for GTransducer::canImplicitlyHandleMissingFeatures.
Reimplemented from GClasses::GTransducer.
virtual void GClasses::GKNN::clear | ( | ) | [virtual] |
Discard any training (but not any settings) so it can be trained again.
Implements GClasses::GSupervisedLearner.
GMatrix* GClasses::GKNN::features | ( | ) | [inline] |
Returns the internal feature set.
void GClasses::GKNN::findNeighbors | ( | const double * | pVector | ) | [protected] |
Finds the nearest neighbors of pVector.
void GClasses::GKNN::interpolateLearner | ( | const double * | pIn, |
GPrediction * | pOut, | ||
double * | pOut2 | ||
) | [protected] |
Interpolates with the provided supervised learning algorithm.
void GClasses::GKNN::interpolateLinear | ( | const double * | pIn, |
GPrediction * | pOut, | ||
double * | pOut2 | ||
) | [protected] |
Interpolate with each neighbor having a linear vote. (Actually it's linear with respect to the squared distance instead of the distance, because this is faster to compute.)
void GClasses::GKNN::interpolateMean | ( | const double * | pIn, |
GPrediction * | pOut, | ||
double * | pOut2 | ||
) | [protected] |
Interpolate with each neighbor having equal vote.
GMatrix* GClasses::GKNN::labels | ( | ) | [inline] |
Returns the internal label set.
GRowDistanceScaled* GClasses::GKNN::metric | ( | ) | [inline] |
Returns the dissimilarity metric.
size_t GClasses::GKNN::neighborCount | ( | ) | [inline] |
Returns the number of neighbors.
virtual void GClasses::GKNN::predictDistributionInner | ( | const double * | pIn, |
GPrediction * | pOut | ||
) | [protected, virtual] |
See the comment for GSupervisedLearner::predictDistributionInner.
Implements GClasses::GSupervisedLearner.
virtual void GClasses::GKNN::predictInner | ( | const double * | pIn, |
double * | pOut | ||
) | [protected, virtual] |
See the comment for GSupervisedLearner::predictInner.
Implements GClasses::GSupervisedLearner.
Marshal this object into a DOM, which can then be converted to a variety of serial formats.
Implements GClasses::GSupervisedLearner.
void GClasses::GKNN::setElbowRoom | ( | double | d | ) | [inline] |
Sets the value for elbow room. (This value is only used with incremental training.)
void GClasses::GKNN::setInterpolationLearner | ( | GSupervisedLearner * | pLearner, |
bool | bTakeOwnership | ||
) |
Sets the interpolation method to "Learner" and sets the learner to use. If bTakeOwnership is true, it will delete the learner when this object is deleted.
void GClasses::GKNN::setInterpolationMethod | ( | InterpolationMethod | eMethod | ) |
Sets the technique for interpolation. (If you want to use the "Learner" method, you should call SetInterpolationLearner instead of this method.)
void GClasses::GKNN::setMetric | ( | GSparseSimilarity * | pMetric, |
bool | own | ||
) |
Sets the sparse similarity metric to use for finding neighbors. If own is true, then this object will delete pMetric when it is done with it.
void GClasses::GKNN::setMetric | ( | GRowDistanceScaled * | pMetric, |
bool | own | ||
) |
Sets the distance metric to use for finding neighbors. If own is true, then this object will delete pMetric when it is done with it.
void GClasses::GKNN::setNeighborCount | ( | size_t | k | ) |
Specify the number of neighbors to use. (The default is 1.)
void GClasses::GKNN::setOptimizeScaleFactors | ( | bool | b | ) |
If you set this to true, it will use a hill-climber to optimize the attribute scaling factors. If you set it to false (the default), it won't.
GSparseMatrix* GClasses::GKNN::sparseFeatures | ( | ) | [inline] |
Returns the internal set of sparse features.
static void GClasses::GKNN::test | ( | ) | [static] |
Performs unit tests for this class. Throws an exception if there is a failure.
Reimplemented from GClasses::GSupervisedLearner.
virtual void GClasses::GKNN::trainIncrementalInner | ( | const double * | pIn, |
const double * | pOut | ||
) | [protected, virtual] |
Adds a vector to the internal set. Also, if the (k+1)th nearest neighbor of that vector is less than "elbow room" from it, then the closest neighbor is deleted from the internal set. (You might be wondering why the decision to delete the closest neighbor is determined by the distance of the (k+1)th neigbor. This enables a clump of k points to form in the most frequently sampled locations. Also, If you make this decision based on a closer neighbor, then big holes may form in the model if points are sampled in a poor order.) Call SetElbowRoom to specify the elbow room distance.
Implements GClasses::GIncrementalLearner.
virtual void GClasses::GKNN::trainInner | ( | GMatrix & | features, |
GMatrix & | labels | ||
) | [protected, virtual] |
See the comment for GSupervisedLearner::trainInner.
Implements GClasses::GSupervisedLearner.
virtual void GClasses::GKNN::trainSparse | ( | GSparseMatrix & | features, |
GMatrix & | labels | ||
) | [virtual] |
See the comment for GIncrementalLearner::trainSparse.
Implements GClasses::GIncrementalLearner.
bool GClasses::GKNN::m_bOwnLearner [protected] |
double GClasses::GKNN::m_dElbowRoom [protected] |
size_t GClasses::GKNN::m_nNeighbors [protected] |
bool GClasses::GKNN::m_optimizeScaleFactors [protected] |
bool GClasses::GKNN::m_ownMetric [protected] |
GKnnScaleFactorCritic* GClasses::GKNN::m_pCritic [protected] |
GRowDistanceScaled* GClasses::GKNN::m_pDistanceMetric [protected] |
double* GClasses::GKNN::m_pEvalDistances [protected] |
size_t* GClasses::GKNN::m_pEvalNeighbors [protected] |
GMatrix* GClasses::GKNN::m_pFeatures [protected] |
GMatrix* GClasses::GKNN::m_pLabels [protected] |
GSupervisedLearner* GClasses::GKNN::m_pLearner [protected] |
GOptimizer* GClasses::GKNN::m_pScaleFactorOptimizer [protected] |
GSparseMatrix* GClasses::GKNN::m_pSparseFeatures [protected] |
GSparseSimilarity* GClasses::GKNN::m_pSparseMetric [protected] |
double* GClasses::GKNN::m_pValueCounts [protected] |