SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
C45ClassifierTree.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Parijat Mazumdar
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 
32 #ifndef _C45CLASSIFIERTREE_H__
33 #define _C45CLASSIFIERTREE_H__
34 
35 #include <shogun/lib/config.h>
36 
40 
41 namespace shogun
42 {
43 
75 class CC45ClassifierTree : public CTreeMachine<C45TreeNodeData>
76 {
77 public:
80 
82  virtual ~CC45ClassifierTree();
83 
87  virtual const char* get_name() const { return "C45ClassifierTree"; }
88 
93  virtual CMulticlassLabels* apply_multiclass(CFeatures* data=NULL);
94 
113  void prune_tree(CDenseFeatures<float64_t>* validation_data, CMulticlassLabels* validation_labels, float64_t epsilon=0.f);
114 
123 
128 
133 
135  void clear_weights();
136 
141 
146 
148  void clear_feature_types();
149 
150 protected:
151 
155  virtual bool train_machine(CFeatures* data=NULL);
156 
157 private:
158 
168  node_t* C45train(CFeatures* data, SGVector<float64_t> weights, CMulticlassLabels* class_labels,
169  SGVector<int32_t> id_values, int level = 0);
170 
178  void prune_tree_from_current_node(CDenseFeatures<float64_t>* feats, CMulticlassLabels* gnd_truth, node_t* current, float64_t epsilon);
179 
188  float64_t informational_gain_attribute(int32_t attr_no, CFeatures* data, SGVector<float64_t> weights,
189  CMulticlassLabels* class_labels);
190 
197  float64_t entropy(CMulticlassLabels* labels, SGVector<float64_t> weights);
198 
206  CMulticlassLabels* apply_multiclass_from_current_node(CDenseFeatures<float64_t>* feats, node_t* current, bool set_certainty=false);
207 
209  void init();
210 
211 public:
213  static const float64_t MISSING;
214 
215 private:
216 
218  SGVector<bool> m_nominal;
219 
221  SGVector<float64_t> m_weights;
222 
226  SGVector<float64_t> m_certainty;
227 
229  bool m_types_set;
230 
232  bool m_weights_set;
233 
234 };
235 } /* namespace shogun */
236 
237 #endif /* _C45CLASSIFIERTREE_H__ */

SHOGUN Machine Learning Toolbox - Documentation