mlpack  2.0.1
hoeffding_numeric_split.hpp
Go to the documentation of this file.
1 
16 #ifndef __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
17 #define __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
18 
19 #include <mlpack/core.hpp>
20 #include "numeric_split_info.hpp"
21 
22 namespace mlpack {
23 namespace tree {
24 
53 template<typename FitnessFunction,
54  typename ObservationType = double>
56 {
57  public:
60 
70  HoeffdingNumericSplit(const size_t numClasses,
71  const size_t bins = 10,
72  const size_t observationsBeforeBinning = 100);
73 
78  HoeffdingNumericSplit(const size_t numClasses,
79  const HoeffdingNumericSplit& other);
80 
90  void Train(ObservationType value, const size_t label);
91 
104  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
105  const;
106 
108  size_t NumChildren() const { return bins; }
109 
114  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo) const;
115 
117  size_t MajorityClass() const;
119  double MajorityProbability() const;
120 
122  size_t Bins() const { return bins; }
123 
125  template<typename Archive>
126  void Serialize(Archive& ar, const unsigned int /* version */);
127 
128  private:
130  arma::Col<ObservationType> observations;
132  arma::Col<size_t> labels;
133 
135  arma::Col<ObservationType> splitPoints;
137  size_t bins;
141  size_t samplesSeen;
142 
144  arma::Mat<size_t> sufficientStatistics;
145 };
146 
148 template<typename FitnessFunction>
149 using HoeffdingDoubleNumericSplit = HoeffdingNumericSplit<FitnessFunction,
150  double>;
151 
152 } // namespace tree
153 } // namespace mlpack
154 
155 // Include implementation.
156 #include "hoeffding_numeric_split_impl.hpp"
157 
158 #endif
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness) const
Evaluate the fitness function given what has been calculated so far.
size_t samplesSeen
The number of samples we have seen so far.
double MajorityProbability() const
Return the probability of the majority class.
Linear algebra utility functions, generally performed on matrices or vectors.
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo) const
Return the majority class of each child to be created, if a split on this dimension was performed...
size_t observationsBeforeBinning
The number of observations we must see before binning.
The HoeffdingNumericSplit class implements the numeric feature splitting strategy alluded to by Domin...
void Train(ObservationType value, const size_t label)
Train the HoeffdingNumericSplit on the given observed value (remember that this object only cares abo...
NumericSplitInfo< ObservationType > SplitInfo
The splitting information type required by the HoeffdingNumericSplit.
void Serialize(Archive &ar, const unsigned int)
Serialize the object.
size_t MajorityClass() const
Return the majority class.
size_t Bins() const
Return the number of bins.
arma::Col< ObservationType > splitPoints
The split points for the binning (length bins - 1).
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
arma::Col< ObservationType > observations
Before binning, this holds the points we have seen so far.
HoeffdingNumericSplit(const size_t numClasses, const size_t bins=10, const size_t observationsBeforeBinning=100)
Create the HoeffdingNumericSplit class, and specify some basic parameters about how the binning shoul...
size_t NumChildren() const
Return the number of children if this node splits on this feature.
arma::Col< size_t > labels
This holds the labels of the points before binning.
arma::Mat< size_t > sufficientStatistics
After binning, this contains the sufficient statistics.