SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
C45ClassifierTree.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Parijat Mazumdar
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 
32 #ifndef _C45CLASSIFIERTREE_H__
33 #define _C45CLASSIFIERTREE_H__
34 
35 #include <shogun/lib/config.h>
36 
40 
41 namespace shogun
42 {
43 
75 class CC45ClassifierTree : public CTreeMachine<C45TreeNodeData>
76 {
77 public:
80 
82  virtual ~CC45ClassifierTree();
83 
87  virtual const char* get_name() const { return "C45ClassifierTree"; }
88 
93  virtual CMulticlassLabels* apply_multiclass(CFeatures* data=NULL);
94 
113  void prune_tree(CDenseFeatures<float64_t>* validation_data, CMulticlassLabels* validation_labels, float64_t epsilon=0.f);
114 
123 
128 
133 
135  void clear_weights();
136 
141 
146 
148  void clear_feature_types();
149 
150 protected:
151 
155  virtual bool train_machine(CFeatures* data=NULL);
156 
157 private:
158 
168  node_t* C45train(CFeatures* data, SGVector<float64_t> weights, CMulticlassLabels* class_labels,
169  SGVector<int32_t> id_values, int level = 0);
170 
178  void prune_tree_from_current_node(CDenseFeatures<float64_t>* feats, CMulticlassLabels* gnd_truth, node_t* current, float64_t epsilon);
179 
188  float64_t informational_gain_attribute(int32_t attr_no, CFeatures* data, SGVector<float64_t> weights,
189  CMulticlassLabels* class_labels);
190 
197  float64_t entropy(CMulticlassLabels* labels, SGVector<float64_t> weights);
198 
206  CMulticlassLabels* apply_multiclass_from_current_node(CDenseFeatures<float64_t>* feats, node_t* current, bool set_certainty=false);
207 
209  void init();
210 
211 public:
213  static const float64_t MISSING;
214 
215 private:
216 
218  SGVector<bool> m_nominal;
219 
221  SGVector<float64_t> m_weights;
222 
226  SGVector<float64_t> m_certainty;
227 
229  bool m_types_set;
230 
232  bool m_weights_set;
233 
234 };
235 } /* namespace shogun */
236 
237 #endif /* _C45CLASSIFIERTREE_H__ */
CTreeMachineNode< C45TreeNodeData > node_t
Definition: TreeMachine.h:52
SGVector< float64_t > get_weights() const
void set_feature_types(SGVector< bool > ft)
virtual bool train_machine(CFeatures *data=NULL)
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
Multiclass Labels for multi-class classification.
Class C45ClassifierTree implements the C4.5 algorithm for decision tree learning. The algorithm steps...
double float64_t
Definition: common.h:50
void set_weights(SGVector< float64_t > w)
static const float64_t MISSING
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
SGVector< bool > get_feature_types() const
SGVector< float64_t > get_certainty_vector() const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual const char * get_name() const
class TreeMachine, a base class for tree based multiclass classifiers. This class is derived from CBa...
Definition: TreeMachine.h:48
void prune_tree(CDenseFeatures< float64_t > *validation_data, CMulticlassLabels *validation_labels, float64_t epsilon=0.f)

SHOGUN Machine Learning Toolbox - Documentation