SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
QDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
11 #include <shogun/lib/common.h>
12 
13 
14 #include <shogun/multiclass/QDA.h>
17 #include <shogun/labels/Labels.h>
20 
22 
23 using namespace shogun;
24 using namespace Eigen;
25 
26 CQDA::CQDA() : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
27 {
28  init();
29 }
30 
31 CQDA::CQDA(float64_t tolerance, bool store_covs)
32 : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
33 {
34  init();
35  m_tolerance = tolerance;
36  m_store_covs = store_covs;
37 }
38 
40 : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
41 {
42  init();
43  set_features(traindat);
44  set_labels(trainlab);
45 }
46 
47 CQDA::CQDA(CDenseFeatures<float64_t>* traindat, CLabels* trainlab, float64_t tolerance)
48 : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
49 {
50  init();
51  set_features(traindat);
52  set_labels(trainlab);
53  m_tolerance = tolerance;
54 }
55 
56 CQDA::CQDA(CDenseFeatures<float64_t>* traindat, CLabels* trainlab, bool store_covs)
57 : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
58 {
59  init();
60  set_features(traindat);
61  set_labels(trainlab);
62  m_store_covs = store_covs;
63 }
64 
65 
66 
67 CQDA::CQDA(CDenseFeatures<float64_t>* traindat, CLabels* trainlab, float64_t tolerance, bool store_covs)
68 : CNativeMulticlassMachine(), m_num_classes(0), m_dim(0)
69 {
70  init();
71  set_features(traindat);
72  set_labels(trainlab);
73  m_tolerance = tolerance;
74  m_store_covs = store_covs;
75 }
76 
78 {
79  SG_UNREF(m_features);
80 
81  cleanup();
82 }
83 
84 void CQDA::init()
85 {
86  m_tolerance = 1e-4;
87  m_store_covs = false;
88  SG_ADD(&m_tolerance, "m_tolerance", "Tolerance member.", MS_AVAILABLE);
89  SG_ADD(&m_store_covs, "m_store_covs", "Store covariances member", MS_NOT_AVAILABLE);
90  SG_ADD((CSGObject**) &m_features, "m_features", "Feature object.", MS_NOT_AVAILABLE);
91  SG_ADD(&m_means, "m_means", "Mean vectors list", MS_NOT_AVAILABLE);
92  SG_ADD(&m_slog, "m_slog", "Vector used in classification", MS_NOT_AVAILABLE);
93 
94  //TODO include SGNDArray objects for serialization
95 
96  m_features = NULL;
97 }
98 
99 void CQDA::cleanup()
100 {
101  m_means=SGMatrix<float64_t>();
102 
103  m_num_classes = 0;
104 }
105 
107 {
108  if (data)
109  {
110  if (!data->has_property(FP_DOT))
111  SG_ERROR("Specified features are not of type CDotFeatures\n")
112 
113  set_features((CDotFeatures*) data);
114  }
115 
116  if ( !m_features )
117  return NULL;
118 
119  int32_t num_vecs = m_features->get_num_vectors();
120  ASSERT(num_vecs > 0)
121  ASSERT( m_dim == m_features->get_dim_feature_space() )
122 
124 
125  MatrixXd X(num_vecs, m_dim);
126  MatrixXd A(num_vecs, m_dim);
127  VectorXd norm2(num_vecs*m_num_classes);
128  norm2.setZero();
129 
131  for (int k = 0; k < m_num_classes; k++)
132  {
133  // X = features - means
134  for (int i = 0; i < num_vecs; i++)
135  {
136  vec = rf->get_feature_vector(i);
137  ASSERT(vec.vector)
138 
139  Map< VectorXd > Evec(vec, vec.vlen);
140  Map< VectorXd > Em_means_col(m_means.get_column_vector(k), m_dim);
141 
142  X.row(i) = Evec - Em_means_col;
143 
144  rf->free_feature_vector(vec, i);
145  }
146 
147  Map< MatrixXd > Em_M(m_M.get_matrix(k), m_dim, m_dim);
148  A = X*Em_M;
149 
150  for (int i = 0; i < num_vecs; i++)
151  norm2(i + k*num_vecs) = A.row(i).array().square().sum();
152 
153  }
154 
155  for (int i = 0; i < num_vecs; i++)
156  for (int k = 0; k < m_num_classes; k++)
157  {
158  norm2[i + k*num_vecs] += m_slog[k];
159  norm2[i + k*num_vecs] *= -0.5;
160  }
161 
162 
163  CMulticlassLabels* out = new CMulticlassLabels(num_vecs);
164 
165  for (int i = 0 ; i < num_vecs; i++)
166  out->set_label(i, CMath::arg_max(norm2.data()+i, num_vecs, m_num_classes));
167 
168  return out;
169 }
170 
172 {
173  if (!m_labels)
174  SG_ERROR("No labels allocated in QDA training\n")
175 
176  if ( data )
177  {
178  if (!data->has_property(FP_DOT))
179  SG_ERROR("Speficied features are not of type CDotFeatures\n")
180 
181  set_features((CDotFeatures*) data);
182  }
183 
184  if (!m_features)
185  SG_ERROR("No features allocated in QDA training\n")
186 
187  SGVector< int32_t > train_labels = ((CMulticlassLabels*) m_labels)->get_int_labels();
188 
189  if (!train_labels.vector)
190  SG_ERROR("No train_labels allocated in QDA training\n")
191 
192  cleanup();
193 
194  m_num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
195  m_dim = m_features->get_dim_feature_space();
196  int32_t num_vec = m_features->get_num_vectors();
197 
198  if (num_vec != train_labels.vlen)
199  SG_ERROR("Dimension mismatch between features and labels in QDA training")
200 
201  int32_t* class_idxs = SG_MALLOC(int32_t, num_vec*m_num_classes); // number of examples of each class
202  int32_t* class_nums = SG_MALLOC(int32_t, m_num_classes);
203  memset(class_nums, 0, m_num_classes*sizeof(int32_t));
204  int32_t class_idx;
205 
206  for (int i = 0; i < train_labels.vlen; i++)
207  {
208  class_idx = train_labels.vector[i];
209 
210  if (class_idx < 0 || class_idx >= m_num_classes)
211  {
212  SG_ERROR("found label out of {0, 1, 2, ..., num_classes-1}...")
213  return false;
214  }
215  else
216  {
217  class_idxs[ class_idx*num_vec + class_nums[class_idx]++ ] = i;
218  }
219  }
220 
221  for (int i = 0; i < m_num_classes; i++)
222  {
223  if (class_nums[i] <= 0)
224  {
225  SG_ERROR("What? One class with no elements\n")
226  return false;
227  }
228  }
229 
230  if (m_store_covs)
231  {
232  // cov_dims will be free in m_covs.destroy_ndarray()
233  index_t * cov_dims = SG_MALLOC(index_t, 3);
234  cov_dims[0] = m_dim;
235  cov_dims[1] = m_dim;
236  cov_dims[2] = m_num_classes;
237  m_covs = SGNDArray< float64_t >(cov_dims, 3);
238  }
239 
240  m_means = SGMatrix< float64_t >(m_dim, m_num_classes, true);
241  SGMatrix< float64_t > scalings = SGMatrix< float64_t >(m_dim, m_num_classes);
242 
243  // rot_dims will be freed in rotations.destroy_ndarray()
244  index_t* rot_dims = SG_MALLOC(index_t, 3);
245  rot_dims[0] = m_dim;
246  rot_dims[1] = m_dim;
247  rot_dims[2] = m_num_classes;
248  SGNDArray< float64_t > rotations = SGNDArray< float64_t >(rot_dims, 3);
249 
251 
252  m_means.zero();
253 
255  for (int k = 0; k < m_num_classes; k++)
256  {
257  MatrixXd buffer(class_nums[k], m_dim);
258  Map< VectorXd > Em_means(m_means.get_column_vector(k), m_dim);
259  for (int i = 0; i < class_nums[k]; i++)
260  {
261  vec = rf->get_feature_vector(class_idxs[k*num_vec + i]);
262  ASSERT(vec.vector)
263 
264  Map< VectorXd > Evec(vec, vec.vlen);
265  Em_means += Evec;
266  buffer.row(i) = Evec;
267 
268  rf->free_feature_vector(vec, class_idxs[k*num_vec + i]);
269  }
270 
271  Em_means /= class_nums[k];
272 
273  for (int i = 0; i < class_nums[k]; i++)
274  buffer.row(i) -= Em_means;
275 
276  // SVD
277  float64_t * col = scalings.get_column_vector(k);
278  float64_t * rot_mat = rotations.get_matrix(k);
279 
280  Eigen::JacobiSVD<MatrixXd> eSvd;
281  eSvd.compute(buffer,Eigen::ComputeFullV);
282  sg_memcpy(col, eSvd.singularValues().data(), m_dim*sizeof(float64_t));
283  sg_memcpy(rot_mat, eSvd.matrixV().data(), m_dim*m_dim*sizeof(float64_t));
284 
285  SGVector<float64_t>::vector_multiply(col, col, col, m_dim);
286  SGVector<float64_t>::scale_vector(1.0/(class_nums[k]-1), col, m_dim);
287 
288  if (m_store_covs)
289  {
290  SGMatrix< float64_t > M(m_dim ,m_dim);
291  MatrixXd MEig = Map<MatrixXd>(rot_mat,m_dim,m_dim);
292  for (int i = 0; i < m_dim; i++)
293  for (int j = 0; j < m_dim; j++)
294  M(i,j)*=scalings[k*m_dim + j];
295  MatrixXd rotE = Map<MatrixXd>(rot_mat,m_dim,m_dim);
296  MatrixXd resE(m_dim,m_dim);
297  resE = MEig * rotE.transpose();
298  sg_memcpy(m_covs.get_matrix(k),resE.data(),m_dim*m_dim*sizeof(float64_t));
299  }
300  }
301 
302  /* Computation of terms required for classification */
303  SGVector< float32_t > sinvsqrt(m_dim);
304 
305  // M_dims will be freed in m_M.destroy_ndarray()
306  index_t* M_dims = SG_MALLOC(index_t, 3);
307  M_dims[0] = m_dim;
308  M_dims[1] = m_dim;
309  M_dims[2] = m_num_classes;
310  m_M = SGNDArray< float64_t >(M_dims, 3);
311 
312  m_slog = SGVector< float32_t >(m_num_classes);
313  m_slog.zero();
314 
315  index_t idx = 0;
316  for (int k = 0; k < m_num_classes; k++)
317  {
318  for (int j = 0; j < m_dim; j++)
319  {
320  sinvsqrt[j] = 1.0 / CMath::sqrt(scalings[k*m_dim + j]);
321  m_slog[k] += CMath::log(scalings[k*m_dim + j]);
322  }
323 
324  for (int i = 0; i < m_dim; i++)
325  for (int j = 0; j < m_dim; j++)
326  {
327  idx = k*m_dim*m_dim + i + j*m_dim;
328  m_M[idx] = rotations[idx] * sinvsqrt[j];
329  }
330  }
331 
332 
333  SG_FREE(class_idxs);
334  SG_FREE(class_nums);
335  return true;
336 }
static int32_t arg_max(T *vec, int32_t inc, int32_t len, T *maxv_ptr=NULL)
Definition: Math.h:258
experimental abstract native multiclass machine class
int32_t index_t
Definition: common.h:72
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
Definition: SGMatrix.h:24
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
Features that support dot products among other operations.
Definition: DotFeatures.h:44
T * get_matrix(index_t matIdx) const
Definition: SGNDArray.h:72
virtual int32_t get_dim_feature_space() const =0
bool set_label(int32_t idx, float64_t label)
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:863
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:545
#define ASSERT(x)
Definition: SGIO.h:200
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:125
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
Definition: QDA.cpp:106
static void vector_multiply(T *target, const T *v1, const T *v2, int32_t len)
Compute vector multiplication.
Definition: SGVector.h:364
double float64_t
Definition: common.h:60
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:140
#define SG_UNREF(x)
Definition: SGObject.h:53
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void set_features(CDotFeatures *feat)
Definition: QDA.h:125
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual ~CQDA()
Definition: QDA.cpp:77
static float64_t log(float64_t v)
Definition: Math.h:917
virtual bool train_machine(CFeatures *data=NULL)
Definition: QDA.cpp:171
#define SG_ADD(...)
Definition: SGObject.h:94
static float32_t sqrt(float32_t x)
Definition: Math.h:454
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)

SHOGUN Machine Learning Toolbox - Documentation