SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
QDA.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
11 #include <shogun/lib/common.h>
12 
13 
14 #include <shogun/multiclass/QDA.h>
17 #include <shogun/labels/Labels.h>
20 
22 
23 using namespace shogun;
24 using namespace Eigen;
25 
26 CQDA::CQDA(float64_t tolerance, bool store_covs)
27 : CNativeMulticlassMachine(), m_tolerance(tolerance),
28  m_store_covs(store_covs), m_num_classes(0), m_dim(0)
29 {
30  init();
31 }
32 
33 CQDA::CQDA(CDenseFeatures<float64_t>* traindat, CLabels* trainlab, float64_t tolerance, bool store_covs)
34 : CNativeMulticlassMachine(), m_tolerance(tolerance), m_store_covs(store_covs), m_num_classes(0), m_dim(0)
35 {
36  init();
37  set_features(traindat);
38  set_labels(trainlab);
39 }
40 
42 {
43  SG_UNREF(m_features);
44 
45  cleanup();
46 }
47 
48 void CQDA::init()
49 {
50  SG_ADD(&m_tolerance, "m_tolerance", "Tolerance member.", MS_AVAILABLE);
51  SG_ADD(&m_store_covs, "m_store_covs", "Store covariances member", MS_NOT_AVAILABLE);
52  SG_ADD((CSGObject**) &m_features, "m_features", "Feature object.", MS_NOT_AVAILABLE);
53  SG_ADD(&m_means, "m_means", "Mean vectors list", MS_NOT_AVAILABLE);
54  SG_ADD(&m_slog, "m_slog", "Vector used in classification", MS_NOT_AVAILABLE);
55 
56  //TODO include SGNDArray objects for serialization
57 
58  m_features = NULL;
59 }
60 
61 void CQDA::cleanup()
62 {
63  m_means=SGMatrix<float64_t>();
64 
65  m_num_classes = 0;
66 }
67 
69 {
70  if (data)
71  {
72  if (!data->has_property(FP_DOT))
73  SG_ERROR("Specified features are not of type CDotFeatures\n")
74 
75  set_features((CDotFeatures*) data);
76  }
77 
78  if ( !m_features )
79  return NULL;
80 
81  int32_t num_vecs = m_features->get_num_vectors();
82  ASSERT(num_vecs > 0)
83  ASSERT( m_dim == m_features->get_dim_feature_space() )
84 
86 
87  MatrixXd X(num_vecs, m_dim);
88  MatrixXd A(num_vecs, m_dim);
89  VectorXd norm2(num_vecs*m_num_classes);
90  norm2.setZero();
91 
92  int32_t vlen;
93  bool vfree;
94  float64_t* vec;
95  for (int k = 0; k < m_num_classes; k++)
96  {
97  // X = features - means
98  for (int i = 0; i < num_vecs; i++)
99  {
100  vec = rf->get_feature_vector(i, vlen, vfree);
101  ASSERT(vec)
102 
103  Map< VectorXd > Evec(vec,vlen);
104  Map< VectorXd > Em_means_col(m_means.get_column_vector(k), m_dim);
105 
106  X.row(i) = Evec - Em_means_col;
107 
108  rf->free_feature_vector(vec, i, vfree);
109  }
110 
111  Map< MatrixXd > Em_M(m_M.get_matrix(k), m_dim, m_dim);
112  A = X*Em_M;
113 
114  for (int i = 0; i < num_vecs; i++)
115  norm2(i + k*num_vecs) = A.row(i).array().square().sum();
116 
117 #ifdef DEBUG_QDA
118  SG_PRINT("\n>>> Displaying A ...\n")
119  SGMatrix< float64_t >::display_matrix(A.data(), num_vecs, m_dim);
120 #endif
121  }
122 
123  for (int i = 0; i < num_vecs; i++)
124  for (int k = 0; k < m_num_classes; k++)
125  {
126  norm2[i + k*num_vecs] += m_slog[k];
127  norm2[i + k*num_vecs] *= -0.5;
128  }
129 
130 #ifdef DEBUG_QDA
131  SG_PRINT("\n>>> Displaying norm2 ...\n")
132  SGMatrix< float64_t >::display_matrix(norm2.data(), num_vecs, m_num_classes);
133 #endif
134 
135  CMulticlassLabels* out = new CMulticlassLabels(num_vecs);
136 
137  for (int i = 0 ; i < num_vecs; i++)
138  out->set_label(i, CMath::arg_max(norm2.data()+i, num_vecs, m_num_classes));
139 
140  return out;
141 }
142 
144 {
145  if (!m_labels)
146  SG_ERROR("No labels allocated in QDA training\n")
147 
148  if ( data )
149  {
150  if (!data->has_property(FP_DOT))
151  SG_ERROR("Speficied features are not of type CDotFeatures\n")
152 
153  set_features((CDotFeatures*) data);
154  }
155 
156  if (!m_features)
157  SG_ERROR("No features allocated in QDA training\n")
158 
159  SGVector< int32_t > train_labels = ((CMulticlassLabels*) m_labels)->get_int_labels();
160 
161  if (!train_labels.vector)
162  SG_ERROR("No train_labels allocated in QDA training\n")
163 
164  cleanup();
165 
166  m_num_classes = ((CMulticlassLabels*) m_labels)->get_num_classes();
167  m_dim = m_features->get_dim_feature_space();
168  int32_t num_vec = m_features->get_num_vectors();
169 
170  if (num_vec != train_labels.vlen)
171  SG_ERROR("Dimension mismatch between features and labels in QDA training")
172 
173  int32_t* class_idxs = SG_MALLOC(int32_t, num_vec*m_num_classes); // number of examples of each class
174  int32_t* class_nums = SG_MALLOC(int32_t, m_num_classes);
175  memset(class_nums, 0, m_num_classes*sizeof(int32_t));
176  int32_t class_idx;
177 
178  for (int i = 0; i < train_labels.vlen; i++)
179  {
180  class_idx = train_labels.vector[i];
181 
182  if (class_idx < 0 || class_idx >= m_num_classes)
183  {
184  SG_ERROR("found label out of {0, 1, 2, ..., num_classes-1}...")
185  return false;
186  }
187  else
188  {
189  class_idxs[ class_idx*num_vec + class_nums[class_idx]++ ] = i;
190  }
191  }
192 
193  for (int i = 0; i < m_num_classes; i++)
194  {
195  if (class_nums[i] <= 0)
196  {
197  SG_ERROR("What? One class with no elements\n")
198  return false;
199  }
200  }
201 
202  if (m_store_covs)
203  {
204  // cov_dims will be free in m_covs.destroy_ndarray()
205  index_t * cov_dims = SG_MALLOC(index_t, 3);
206  cov_dims[0] = m_dim;
207  cov_dims[1] = m_dim;
208  cov_dims[2] = m_num_classes;
209  m_covs = SGNDArray< float64_t >(cov_dims, 3);
210  }
211 
212  m_means = SGMatrix< float64_t >(m_dim, m_num_classes, true);
213  SGMatrix< float64_t > scalings = SGMatrix< float64_t >(m_dim, m_num_classes);
214 
215  // rot_dims will be freed in rotations.destroy_ndarray()
216  index_t* rot_dims = SG_MALLOC(index_t, 3);
217  rot_dims[0] = m_dim;
218  rot_dims[1] = m_dim;
219  rot_dims[2] = m_num_classes;
220  SGNDArray< float64_t > rotations = SGNDArray< float64_t >(rot_dims, 3);
221 
223 
224  m_means.zero();
225 
226  int32_t vlen;
227  bool vfree;
228  float64_t* vec;
229  for (int k = 0; k < m_num_classes; k++)
230  {
231  MatrixXd buffer(class_nums[k], m_dim);
232  Map< VectorXd > Em_means(m_means.get_column_vector(k), m_dim);
233  for (int i = 0; i < class_nums[k]; i++)
234  {
235  vec = rf->get_feature_vector(class_idxs[k*num_vec + i], vlen, vfree);
236  ASSERT(vec)
237 
238  Map< VectorXd > Evec(vec, vlen);
239  Em_means += Evec;
240  buffer.row(i) = Evec;
241 
242  rf->free_feature_vector(vec, class_idxs[k*num_vec + i], vfree);
243  }
244 
245  Em_means /= class_nums[k];
246 
247  for (int i = 0; i < class_nums[k]; i++)
248  buffer.row(i) -= Em_means;
249 
250  // SVD
251  float64_t * col = scalings.get_column_vector(k);
252  float64_t * rot_mat = rotations.get_matrix(k);
253 
254  Eigen::JacobiSVD<MatrixXd> eSvd;
255  eSvd.compute(buffer,Eigen::ComputeFullV);
256  memcpy(col, eSvd.singularValues().data(), m_dim*sizeof(float64_t));
257  memcpy(rot_mat, eSvd.matrixV().data(), m_dim*m_dim*sizeof(float64_t));
258 
259  SGVector<float64_t>::vector_multiply(col, col, col, m_dim);
260  SGVector<float64_t>::scale_vector(1.0/(class_nums[k]-1), col, m_dim);
261 
262  if (m_store_covs)
263  {
264  SGMatrix< float64_t > M(m_dim ,m_dim);
265  MatrixXd MEig = Map<MatrixXd>(rot_mat,m_dim,m_dim);
266  for (int i = 0; i < m_dim; i++)
267  for (int j = 0; j < m_dim; j++)
268  M(i,j)*=scalings[k*m_dim + j];
269  MatrixXd rotE = Map<MatrixXd>(rot_mat,m_dim,m_dim);
270  MatrixXd resE(m_dim,m_dim);
271  resE = MEig * rotE.transpose();
272  memcpy(m_covs.get_matrix(k),resE.data(),m_dim*m_dim*sizeof(float64_t));
273  }
274  }
275 
276  /* Computation of terms required for classification */
277  SGVector< float32_t > sinvsqrt(m_dim);
278 
279  // M_dims will be freed in m_M.destroy_ndarray()
280  index_t* M_dims = SG_MALLOC(index_t, 3);
281  M_dims[0] = m_dim;
282  M_dims[1] = m_dim;
283  M_dims[2] = m_num_classes;
284  m_M = SGNDArray< float64_t >(M_dims, 3);
285 
286  m_slog = SGVector< float32_t >(m_num_classes);
287  m_slog.zero();
288 
289  index_t idx = 0;
290  for (int k = 0; k < m_num_classes; k++)
291  {
292  for (int j = 0; j < m_dim; j++)
293  {
294  sinvsqrt[j] = 1.0 / CMath::sqrt(scalings[k*m_dim + j]);
295  m_slog[k] += CMath::log(scalings[k*m_dim + j]);
296  }
297 
298  for (int i = 0; i < m_dim; i++)
299  for (int j = 0; j < m_dim; j++)
300  {
301  idx = k*m_dim*m_dim + i + j*m_dim;
302  m_M[idx] = rotations[idx] * sinvsqrt[j];
303  }
304  }
305 
306 #ifdef DEBUG_QDA
307  SG_PRINT(">>> QDA machine trained with %d classes\n", m_num_classes)
308 
309  SG_PRINT("\n>>> Displaying means ...\n")
310  SGMatrix< float64_t >::display_matrix(m_means.matrix, m_dim, m_num_classes);
311 
312  SG_PRINT("\n>>> Displaying scalings ...\n")
313  SGMatrix< float64_t >::display_matrix(scalings.matrix, m_dim, m_num_classes);
314 
315  SG_PRINT("\n>>> Displaying rotations ... \n")
316  for (int k = 0; k < m_num_classes; k++)
317  SGMatrix< float64_t >::display_matrix(rotations.get_matrix(k), m_dim, m_dim);
318 
319  SG_PRINT("\n>>> Displaying sinvsqrt ... \n")
320  sinvsqrt.display_vector();
321 
322  SG_PRINT("\n>>> Diplaying m_M matrices ... \n")
323  for (int k = 0; k < m_num_classes; k++)
325 
326  SG_PRINT("\n>>> Exit DEBUG_QDA\n")
327 #endif
328 
329  SG_FREE(class_idxs);
330  SG_FREE(class_nums);
331  return true;
332 }
333 
void display_matrix(const char *name="matrix") const
Definition: SGMatrix.cpp:390
static int32_t arg_max(T *vec, int32_t inc, int32_t len, T *maxv_ptr=NULL)
Definition: Math.h:262
experimental abstract native multiclass machine class
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
Definition: SGMatrix.h:20
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
Features that support dot products among other operations.
Definition: DotFeatures.h:44
T * get_matrix(index_t matIdx) const
Definition: SGNDArray.h:72
virtual int32_t get_dim_feature_space() const =0
static void scale_vector(T alpha, T *vec, int32_t len)
Scale vector inplace.
Definition: SGVector.cpp:820
void display_vector(const char *name="vector", const char *prefix="") const
Definition: SGVector.cpp:354
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:492
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
Definition: QDA.cpp:68
static void vector_multiply(T *target, const T *v1, const T *v2, int32_t len)
Compute vector multiplication.
Definition: SGVector.h:324
double float64_t
Definition: common.h:50
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:113
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void set_features(CDotFeatures *feat)
Definition: QDA.h:99
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual ~CQDA()
Definition: QDA.cpp:41
static float64_t log(float64_t v)
Definition: Math.h:922
virtual bool train_machine(CFeatures *data=NULL)
Definition: QDA.cpp:143
#define SG_ADD(...)
Definition: SGObject.h:81
static float32_t sqrt(float32_t x)
Definition: Math.h:459
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
CQDA(float64_t tolerance=1e-4, bool store_covs=false)
Definition: QDA.cpp:26
virtual void set_labels(CLabels *lab)

SHOGUN Machine Learning Toolbox - Documentation