SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GUIPreprocessor.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/ui/SGInterface.h>
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/io/SGIO.h>
17 #include <shogun/lib/config.h>
33 
34 #include <string.h>
35 #include <stdio.h>
36 
37 using namespace shogun;
38 
40 : CSGObject(), ui(ui_)
41 {
42  preprocs=new CList(true);
43 }
44 
46 {
48 }
49 
51 {
52  CPreprocessor* preproc=new CPruneVarSubMean(divide_by_std);
53 
54  if (preproc)
55  SG_INFO("PRUNEVARSUBMEAN created (%p), divide_by_std %d", preproc, divide_by_std)
56  else
57  SG_ERROR("Could not create preproc PRUNEVARSUBMEAN, divide_by_std %d", divide_by_std)
58 
59  return preproc;
60 }
61 
63 {
64  CPreprocessor* preproc=new CPCA(do_whitening, THRESHOLD, threshold);
65 
66  if (preproc)
67  SG_INFO("PCA created (%p), do_whitening %i threshold %e", preproc, do_whitening, threshold)
68  else
69  SG_ERROR("Could not create preproc PCA, do_whitening %i threshold %e", do_whitening, threshold)
70 
71  return preproc;
72 }
73 
75 {
76  CPreprocessor* preproc=NULL;
77 
78  switch (type)
79  {
80  case P_NORMONE:
81  preproc=new CNormOne(); break;
82  case P_LOGPLUSONE:
83  preproc=new CLogPlusOne(); break;
84  case P_SORTWORDSTRING:
85  preproc=new CSortWordString(); break;
86  case P_SORTULONGSTRING:
87  preproc=new CSortUlongString(); break;
89  preproc=new CDecompressString<char>(LZO); break;
90  default:
91  SG_ERROR("Unknown Preprocessor type %d\n", type)
92  }
93 
94  if (preproc)
95  SG_INFO("Preproc of type %d created (%p).\n", type, preproc)
96  else
97  SG_ERROR("Could not create preproc of type %d.\n", type)
98 
99  return preproc;
100 }
101 
103 {
104  return preprocs->append_element_at_listend(preproc);
105 }
106 
108 {
110  preprocs=new CList(true);
111  return (preprocs!=NULL);
112 }
113 
115 {
116  SG_INFO("Deleting preproc %i/(%i).\n", preprocs->get_num_elements()-1, preprocs->get_num_elements())
117 
118  CSGObject* preproc=preprocs->delete_element();
119  SG_UNREF(preproc);
120 
121  return (preproc!=NULL);
122 }
123 
124 bool CGUIPreprocessor::attach_preproc(char* target, bool do_force)
125 {
126  bool result=false;
127 
128  if (strncmp(target, "TRAIN", 5)==0)
129  {
130  CFeatures* f=ui->ui_features->get_train_features();
131  if (!f)
132  SG_ERROR("No train features assigned!\n")
133 
134  if (f->get_feature_class()==C_COMBINED)
135  f=((CCombinedFeatures*)f)->get_last_feature_obj();
136 
137  preprocess_features(f, NULL, do_force);
138  ui->ui_features->invalidate_train();
139  result=true;
140  }
141  else if (strncmp(target, "TEST", 4)==0)
142  {
143  CFeatures* f_test=ui->ui_features->get_test_features();
144  if (!f_test)
145  SG_ERROR("No test features assigned!\n")
146 
147  CFeatures* f_train=ui->ui_features->get_train_features();
148  if (!f_train)
149  SG_ERROR("No train features assigned!\n")
150 
151  EFeatureClass fclass_train=f_train->get_feature_class();
152  EFeatureClass fclass_test=f_test->get_feature_class();
153 
154  if (fclass_train==fclass_test)
155  {
156  if (fclass_train==C_COMBINED)
157  {
158  if (((CCombinedFeatures*) f_train)->check_feature_obj_compatibility((CCombinedFeatures*) f_test))
159  {
160 
161  int32_t num_combined=((CCombinedFeatures*) f_test)->get_num_feature_obj();
162  ASSERT(((CCombinedFeatures*) f_train)->get_num_feature_obj()==num_combined)
163 
164  if (!num_combined)
165  SG_ERROR("One of the combined features has no sub-features ?!\n")
166 
167  //preprocess the last test feature obj
168  SG_INFO("BEGIN PREPROCESSING COMBINED FEATURES (%d sub-featureobjects).\n", num_combined)
169  index_t f_idx = 0;
170  for (; f_idx<num_combined; f_idx++)
171  {
172  CFeatures* te_feat=((CCombinedFeatures*) f_test)->get_feature_obj(f_idx);
173  CFeatures* tr_feat=((CCombinedFeatures*) f_train)->get_feature_obj(f_idx);
174 
175  if (!(te_feat && tr_feat))
176  break;
177 
178  // and preprocess using that one
179  SG_INFO("TRAIN ")
180  tr_feat->list_feature_obj();
181  SG_INFO("TEST ")
182  te_feat->list_feature_obj();
183  preprocess_features(tr_feat, te_feat, do_force);
184  }
185  ASSERT(f_idx==num_combined)
186  result=true;
187  SG_INFO("END PREPROCESSING COMBINED FEATURES\n")
188  }
189  else
190  SG_ERROR("combined features not compatible\n")
191  }
192  else
193  {
194  preprocess_features(f_train, f_test, do_force);
195  ui->ui_features->invalidate_test();
196  result=true;
197  }
198  }
199  else
200  SG_ERROR("Features not compatible.\n")
201  }
202  else
203  SG_ERROR("Features not correctly assigned!\n")
204 
205 
206  if (result)
207  clean_preproc();
208 
209  return result;
210 }
211 
212 bool CGUIPreprocessor::preprocess_features(CFeatures* trainfeat, CFeatures* testfeat, bool force)
213 {
214  if (trainfeat)
215  {
216  if (testfeat)
217  {
218  // if we don't have a preproc for trainfeatures we
219  // don't need a preproc for test features
220  SG_DEBUG("%d preprocessors attached to train features %d to test features\n", trainfeat->get_num_preprocessors(), testfeat->get_num_preprocessors())
221 
222  if (trainfeat->get_num_preprocessors() < testfeat->get_num_preprocessors())
223  {
224  SG_ERROR("more preprocessors attached to test features than to train features\n")
225  return false;
226  }
227 
228  if (trainfeat->get_num_preprocessors() && (trainfeat->get_num_preprocessors() > testfeat->get_num_preprocessors()))
229  {
230  for (int32_t i=0; i<trainfeat->get_num_preprocessors(); i++)
231  {
232  CPreprocessor* preproc = trainfeat->get_preprocessor(i);
233  preproc->init(trainfeat);
234  testfeat->add_preprocessor(preproc);
235  SG_UNREF(preproc);
236  }
237 
238  preproc_all_features(testfeat, force);
239  }
240  }
241  else
242  {
244 
245  if (preproc)
246  {
247  preproc->init(trainfeat);
248  trainfeat->add_preprocessor(preproc);
249 
250  preproc_all_features(trainfeat, force);
251  SG_UNREF(preproc);
252  }
253 
254  while ( (preproc = (CPreprocessor*) preprocs->get_next_element()) !=NULL )
255  {
256  preproc->init(trainfeat);
257  trainfeat->add_preprocessor(preproc);
258  SG_UNREF(preproc);
259 
260  preproc_all_features(trainfeat, force);
261  }
262  }
263 
264  return true;
265  }
266  else
267  SG_ERROR("no features for preprocessing available!\n")
268 
269  return false;
270 }
271 
273 {
274  switch (f->get_feature_class())
275  {
276  case C_DENSE:
277  switch (f->get_feature_type())
278  {
279  case F_DREAL:
280  return ((CDenseFeatures<float64_t>*) f)->apply_preprocessor(force);
281  case F_SHORT:
282  return ((CDenseFeatures<int16_t>*) f)->apply_preprocessor(force);
283  case F_WORD:
284  return ((CDenseFeatures<uint16_t>*) f)->apply_preprocessor(force);
285  case F_CHAR:
286  return ((CDenseFeatures<char>*) f)->apply_preprocessor(force);
287  case F_BYTE:
288  return ((CDenseFeatures<uint8_t>*) f)->apply_preprocessor(force);
289  default:
291  }
292  break;
293  case C_STRING:
294  switch (f->get_feature_type())
295  {
296  case F_WORD:
297  return ((CStringFeatures<uint16_t>*) f)->apply_preprocessor(force);
298  case F_ULONG:
299  return ((CStringFeatures<uint64_t>*) f)->apply_preprocessor(force);
300  default:
302  }
303  break;
304  case C_SPARSE:
305  switch (f->get_feature_type())
306  {
307  case F_DREAL:
308  return ((CSparseFeatures<float64_t>*) f)->apply_preprocessor(force);
309  default:
311  };
312  break;
313  case C_COMBINED:
314  SG_ERROR("Combined feature objects cannot be preprocessed. Only its sub-feature objects!\n")
315  break;
316  default:
318  }
319 
320  return false;
321 }
#define SG_INFO(...)
Definition: SGIO.h:118
virtual bool init(CFeatures *features)=0
CSGObject * get_next_element()
Definition: List.h:185
EPreprocessorType
Definition: Preprocessor.h:32
int32_t index_t
Definition: common.h:62
#define SG_ERROR(...)
Definition: SGIO.h:129
#define SG_NOTIMPLEMENTED
Definition: SGIO.h:139
CPreprocessor * get_preprocessor(int32_t num) const
Definition: Features.cpp:93
Preprocessor SortUlongString, sorts the indivual strings in ascending order.
Preprocessor LogPlusOne does what the name says, it adds one to a dense real valued vector and takes ...
Definition: LogPlusOne.h:34
CSGObject * delete_element()
Definition: List.h:502
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
Preprocessor PruneVarSubMean will substract the mean and remove features that have zero variance...
CPreprocessor * create_prunevarsubmean(bool divide_by_std=false)
int32_t get_num_preprocessors() const
Definition: Features.cpp:155
CSGObject * get_first_element()
Definition: List.h:151
void list_feature_obj() const
Definition: Features.cpp:171
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
CPreprocessor * create_generic(EPreprocessorType type)
double float64_t
Definition: common.h:50
bool preproc_all_features(CFeatures *f, bool force)
virtual EFeatureClass get_feature_class() const =0
int32_t get_num_elements()
Definition: List.h:145
bool preprocess_features(CFeatures *trainfeat, CFeatures *testfeat, bool force)
virtual void add_preprocessor(CPreprocessor *p)
Definition: Features.cpp:85
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
Preprocessor NormOne, normalizes vectors to have norm 1.
Definition: NormOne.h:34
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
bool add_preproc(CPreprocessor *preproc)
The class Features is the base class of all feature objects.
Definition: Features.h:68
Preprocessor PCA performs principial component analysis on input feature vectors/matrices. When the init method in PCA is called with proper feature matrix X (with say N number of vectors and D feature dimension), a transformation matrix is computed and stored internally. This transformation matrix is then used to transform all D-dimensional feature vectors or feature matrices (with D feature dimensions) supplied via apply_to_feature_matrix or apply_to_feature_vector methods. This tranformation outputs the T-Dimensional approximation of all these input vectors and matrices (where T<=min(D,N)). The transformation matrix is essentially a DxT matrix, the columns of which correspond to the eigenvectors of the covariance matrix(XX') having top T eigenvalues.
Definition: PCA.h:112
bool attach_preproc(char *target, bool do_force=false)
Class Preprocessor defines a preprocessor interface.
Definition: Preprocessor.h:75
Preprocessor that decompresses compressed strings.
Preprocessor SortWordString, sorts the indivual strings in ascending order.
The class CombinedFeatures is used to combine a number of of feature objects into a single CombinedFe...
bool append_element_at_listend(CSGObject *data)
Definition: List.h:386
CPreprocessor * create_pca(bool do_whitening, float64_t threshold)
virtual EFeatureType get_feature_type() const =0
Class List implements a doubly connected list for low-level-objects.
Definition: List.h:84

SHOGUN Machine Learning Toolbox - Documentation