SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LMNNImpl.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Fernando J. Iglesias Garcia
8  * Copyright (C) 2013 Fernando J. Iglesias Garcia
9  */
10 
11 #ifdef HAVE_EIGEN3
12 
13 #include <shogun/metric/LMNNImpl.h>
14 #include <shogun/multiclass/KNN.h>
17 
18 #include <iterator>
19 
21 
22 // column-wise sum of the squared elements of a matrix
23 #define SUMSQCOLS(A) ((A).array().square().colwise().sum())
24 
25 using namespace shogun;
26 using namespace Eigen;
27 
28 CImpostorNode::CImpostorNode(index_t ex, index_t tar, index_t imp)
29 : example(ex), target(tar), impostor(imp)
30 {
31 }
32 
33 bool CImpostorNode::operator<(const CImpostorNode& rhs) const
34 {
35  if (example == rhs.example)
36  {
37  if (target == rhs.target)
38  return impostor < rhs.impostor;
39  else
40  return target < rhs.target;
41  }
42  else
43  return example < rhs.example;
44 }
45 
46 void CLMNNImpl::check_training_setup(CFeatures* features, const CLabels* labels,
47  SGMatrix<float64_t>& init_transform)
48 {
49  REQUIRE(features->has_property(FP_DOT),
50  "LMNN can only be applied to features that support dot products\n")
52  "LMNN supports only MulticlassLabels\n")
53  REQUIRE(labels->get_num_labels()==features->get_num_vectors(),
54  "The number of feature vectors must be equal to the number of labels\n")
55  //FIXME this requirement should be dropped in the future
56  REQUIRE(features->get_feature_class()==C_DENSE,
57  "Currently, LMNN supports only DenseFeatures\n")
58 
59  // cast is safe, we ensure above that features are dense
60  CDenseFeatures<float64_t>* x = static_cast<CDenseFeatures<float64_t>*>(features);
61 
63  if (init_transform.num_rows==0)
64  init_transform = CLMNNImpl::compute_pca_transform(x);
65 
66  REQUIRE(init_transform.num_rows==x->get_num_features() &&
67  init_transform.num_rows==init_transform.num_cols,
68  "The initial transform must be a square matrix of size equal to the "
69  "number of features\n")
70 }
71 
72 SGMatrix<index_t> CLMNNImpl::find_target_nn(CDenseFeatures<float64_t>* x,
73  CMulticlassLabels* y, int32_t k)
74 {
75  SG_SDEBUG("Entering CLMNNImpl::find_target_nn().\n")
76 
77  // get the number of features
78  int32_t d = x->get_num_features();
79  SGMatrix<index_t> target_neighbors(k, x->get_num_vectors());
80  SGVector<float64_t> unique_labels = y->get_unique_labels();
81  CDenseFeatures<float64_t>* features_slice = new CDenseFeatures<float64_t>();
82  CMulticlassLabels* labels_slice = new CMulticlassLabels();
83  SGVector<index_t> idxsmap(x->get_num_vectors());
84 
85  // increase ref count because a KNN instance will be created for each slice, and it
86  // decreses the ref count of its labels and features when destroyed
87  SG_REF(features_slice)
88  SG_REF(labels_slice)
89 
90  for (index_t i = 0; i < unique_labels.vlen; ++i)
91  {
92  int32_t slice_size = 0;
93  for (int32_t j = 0; j < y->get_num_labels(); ++j)
94  {
95  if (y->get_label(j)==unique_labels[i])
96  {
97  idxsmap[slice_size] = j;
98  ++slice_size;
99  }
100  }
101 
102  MatrixXd slice_mat(d, slice_size);
103  for (int32_t j = 0; j < slice_size; ++j)
104  slice_mat.col(j) = Map<const VectorXd>(x->get_feature_vector(idxsmap[j]).vector, d);
105 
106  features_slice->set_feature_matrix(SGMatrix<float64_t>(slice_mat.data(), d, slice_size, false));
107 
108  //FIXME the labels are not actually necessary to get the nearest neighbors, the
109  //features suffice. The labels are needed when we want to classify.
110  SGVector<float64_t> labels_vec(slice_size);
111  labels_vec.set_const(unique_labels[i]);
112  labels_slice->set_labels(labels_vec);
113 
114  CKNN* knn = new CKNN(k+1, new CEuclideanDistance(features_slice, features_slice), labels_slice);
115  SGMatrix<int32_t> target_slice = knn->nearest_neighbors();
116  // sanity check
117  ASSERT(target_slice.num_rows==k+1 && target_slice.num_cols==slice_size)
118 
119  for (index_t j = 0; j < target_slice.num_cols; ++j)
120  {
121  for (index_t l = 1; l < target_slice.num_rows; ++l)
122  target_neighbors(l-1, idxsmap[j]) = idxsmap[ target_slice(l,j) ];
123  }
124 
125  // clean up knn
126  SG_UNREF(knn)
127  }
128 
129  // clean up features and labels
130  SG_UNREF(features_slice)
131  SG_UNREF(labels_slice)
132 
133  SG_SDEBUG("Leaving CLMNNImpl::find_target_nn().\n")
134 
135  return target_neighbors;
136 }
137 
138 MatrixXd CLMNNImpl::sum_outer_products(CDenseFeatures<float64_t>* x, const SGMatrix<index_t> target_nn)
139 {
140  // get the number of features
141  int32_t d = x->get_num_features();
142  // initialize the sum of outer products (sop)
143  MatrixXd sop(d,d);
144  sop.setZero();
145  // map the feature matrix (each column is a feature vector) to an Eigen matrix
146  Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, x->get_num_vectors());
147 
148  // sum the outer products stored in C using the indices specified in target_nn
149  for (index_t i = 0; i < target_nn.num_cols; ++i)
150  {
151  for (index_t j = 0; j < target_nn.num_rows; ++j)
152  {
153  VectorXd dx = X.col(i) - X.col(target_nn(j,i));
154  sop += dx*dx.transpose();
155  }
156  }
157 
158  return sop;
159 }
160 
161 ImpostorsSetType CLMNNImpl::find_impostors(CDenseFeatures<float64_t>* x,
162  CMulticlassLabels* y, const MatrixXd& L, const SGMatrix<index_t> target_nn,
163  const uint32_t iter, const uint32_t correction)
164 {
165  SG_SDEBUG("Entering CLMNNImpl::find_impostors().\n")
166 
167  // get the number of examples from data
168  int32_t n = x->get_num_vectors();
169  // get the number of features
170  int32_t d = x->get_num_features();
171  // get the number of neighbors
172  int32_t k = target_nn.num_rows;
173 
174  // map the feature matrix (each column is a feature vector) to an Eigen matrix
175  Map<const MatrixXd> X(x->get_feature_matrix().matrix, d, n);
176  // transform the feature vectors
177  MatrixXd LX = L*X;
178 
179  // compute square distances plus margin from examples to target neighbors
180  MatrixXd sqdists = CLMNNImpl::compute_sqdists(LX,target_nn);
181 
182  // initialize impostors set
183  ImpostorsSetType N;
184  // exact impostors set shared between calls to this method
185  static ImpostorsSetType Nexact;
186 
187  // impostors search
188  REQUIRE(correction>0, "The number of iterations between exact updates of the "
189  "impostors set must be greater than 0\n")
190  if ((iter % correction)==0)
191  {
192  Nexact = CLMNNImpl::find_impostors_exact(LX, sqdists, y, target_nn, k);
193  N = Nexact;
194  }
195  else
196  {
197  N = CLMNNImpl::find_impostors_approx(LX, sqdists, Nexact, target_nn);
198  }
199 
200  SG_SDEBUG("Leaving CLMNNImpl::find_impostors().\n")
201 
202  return N;
203 }
204 
205 void CLMNNImpl::update_gradient(CDenseFeatures<float64_t>* x, MatrixXd& G,
206  const ImpostorsSetType& Nc, const ImpostorsSetType& Np, float64_t regularization)
207 {
208  // compute the difference sets
209  ImpostorsSetType Np_Nc, Nc_Np;
210  set_difference(Np.begin(), Np.end(), Nc.begin(), Nc.end(), inserter(Np_Nc, Np_Nc.begin()));
211  set_difference(Nc.begin(), Nc.end(), Np.begin(), Np.end(), inserter(Nc_Np, Nc_Np.begin()));
212 
213  // map the feature matrix (each column is a feature vector) to an Eigen matrix
214  Map<const MatrixXd> X(x->get_feature_matrix().matrix, x->get_num_features(), x->get_num_vectors());
215 
216  // remove the gradient contributions of the impostors that were in the previous
217  // set but disappeared in the current
218  for (ImpostorsSetType::iterator it = Np_Nc.begin(); it != Np_Nc.end(); ++it)
219  {
220  VectorXd dx1 = X.col(it->example) - X.col(it->target);
221  VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
222  G -= regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
223  }
224 
225  // add the gradient contributions of the new impostors
226  for (ImpostorsSetType::iterator it = Nc_Np.begin(); it != Nc_Np.end(); ++it)
227  {
228  VectorXd dx1 = X.col(it->example) - X.col(it->target);
229  VectorXd dx2 = X.col(it->example) - X.col(it->impostor);
230  G += regularization*(dx1*dx1.transpose() - dx2*dx2.transpose());
231  }
232 }
233 
234 void CLMNNImpl::gradient_step(MatrixXd& L, const MatrixXd& G, float64_t stepsize, bool diagonal)
235 {
236  if (diagonal)
237  {
238  // compute M as the square of L
239  MatrixXd M = L.transpose()*L;
240  // do step in M along the gradient direction
241  M -= stepsize*G;
242  // keep only the elements in the diagonal of M
243  VectorXd m = M.diagonal();
244 
245  VectorXd zero;
246  zero.resize(m.size());
247  zero.setZero();
248 
249  // return to representation in L
250  VectorXd l = m.array().max(zero.array()).array().sqrt();
251  L = l.asDiagonal();
252  }
253  else
254  {
255  // do step in L along the gradient direction (no need to project M then)
256  L -= stepsize*(2*L*G);
257  }
258 }
259 
260 void CLMNNImpl::correct_stepsize(float64_t& stepsize, const SGVector<float64_t> obj, const uint32_t iter)
261 {
262  if (iter > 0)
263  {
264  // Difference between current and previous objective
265  float64_t delta = obj[iter] - obj[iter-1];
266 
267  if (delta > 0)
268  {
269  // The objective has increased, we have probably jumped over the optimum,
270  // thus, decrease the step size
271  stepsize *= 0.5;
272  }
273  else
274  {
275  // The objective has decreased, we are in the right direction,
276  // increase the step size
277  stepsize *= 1.01;
278  }
279  }
280 }
281 
282 bool CLMNNImpl::check_termination(float64_t stepsize, const SGVector<float64_t> obj, uint32_t iter, uint32_t maxiter, float64_t stepsize_threshold, float64_t obj_threshold)
283 {
284  if (iter >= maxiter-1)
285  {
286  SG_SWARNING("Maximum number of iterations reached before convergence.");
287  return true;
288  }
289 
290  if (stepsize < stepsize_threshold)
291  {
292  SG_SDEBUG("Step size too small to make more progress. Convergence reached.\n");
293  return true;
294  }
295 
296  if (iter >= 10)
297  {
298  for (int32_t i = 0; i < 3; ++i)
299  {
300  if (CMath::abs(obj[iter-i]-obj[iter-i-1]) >= obj_threshold)
301  return false;
302  }
303 
304  SG_SDEBUG("No more progress in the objective. Convergence reached.\n");
305  return true;
306  }
307 
308  // For the rest of the cases, do not stop LMNN.
309  return false;
310 }
311 
312 SGMatrix<float64_t> CLMNNImpl::compute_pca_transform(CDenseFeatures<float64_t>* features)
313 {
314  SG_SDEBUG("Initializing LMNN transform using PCA.\n");
315 
316  // Substract the mean of the features
317  // Create clone of the features to keep the input features unmodified
318  CDenseFeatures<float64_t>* cloned_features =
320  CPruneVarSubMean* mean_substractor =
321  new CPruneVarSubMean(false); // false to avoid variance normalization
322  mean_substractor->init(cloned_features);
323  mean_substractor->apply_to_feature_matrix(cloned_features);
324 
325  // Obtain the linear transform applying PCA
326  CPCA* pca = new CPCA();
327  pca->set_target_dim(cloned_features->get_num_features());
328  pca->init(cloned_features);
329  SGMatrix<float64_t> pca_transform = pca->get_transformation_matrix();
330 
331  SG_UNREF(pca);
332  SG_UNREF(mean_substractor);
333  SG_UNREF(cloned_features);
334 
335  return pca_transform;
336 }
337 
338 MatrixXd CLMNNImpl::compute_sqdists(MatrixXd& LX, const SGMatrix<index_t> target_nn)
339 {
340  // get the number of examples
341  ASSERT(LX.cols()==target_nn.num_cols)
342  int32_t n = LX.cols();
343  // get the number of features
344  int32_t d = LX.rows();
345  // get the number of neighbors
346  int32_t k = target_nn.num_rows;
347 
349 
350  // create Shogun features from LX to later apply subset
351  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
353 
354  // initialize distances
355  MatrixXd sqdists(k,n);
356  sqdists.setZero();
357  for (int32_t i = 0; i < k; ++i)
358  {
359  //FIXME avoid copying the rows of target_nn and access them directly. Maybe
360  //find_target_nn should be changed to return the output transposed wrt how it is
361  //done atm.
362  SGVector<index_t> subset_vec = target_nn.get_row_vector(i);
363  lx->add_subset(subset_vec);
364  // after the subset, there are still n columns, i.e. the subset is used to
365  // modify the order of the columns in x according to the target neighbors
366  sqdists.row(i) = SUMSQCOLS(LX - Map<const MatrixXd>(lx->get_feature_matrix().matrix, d, n)) + 1;
367  lx->remove_subset();
368  }
369 
370  // clean up features used to apply subset
371  SG_UNREF(lx);
372 
373  return sqdists;
374 }
375 
376 ImpostorsSetType CLMNNImpl::find_impostors_exact(MatrixXd& LX, const MatrixXd& sqdists,
377  CMulticlassLabels* y, const SGMatrix<index_t> target_nn, int32_t k)
378 {
379  SG_SDEBUG("Entering CLMNNImpl::find_impostors_exact().\n")
380 
381  // initialize empty impostors set
382  ImpostorsSetType N = ImpostorsSetType();
383 
384  // get the number of examples from data
385  int32_t n = LX.cols();
386  // get the number of features
387  int32_t d = LX.rows();
388  // create Shogun features from LX to later apply subset
389  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
390  CDenseFeatures<float64_t>* lx = new CDenseFeatures<float64_t>(lx_mat);
391 
392  // get a vector with unique label values
393  SGVector<float64_t> unique = y->get_unique_labels();
394 
395  // for each label except from the largest one
396  for (index_t i = 0; i < unique.vlen-1; ++i)
397  {
398  // get the indices of the examples labelled as unique[i]
399  std::vector<index_t> iidxs = CLMNNImpl::get_examples_label(y,unique[i]);
400  // get the indices of the examples that have a larger label value, so that
401  // pairwise distances are computed once
402  std::vector<index_t> gtidxs = CLMNNImpl::get_examples_gtlabel(y,unique[i]);
403 
404  // get distance with features indexed by iidxs and gtidxs separated
405  CEuclideanDistance* euclidean = CLMNNImpl::setup_distance(lx,iidxs,gtidxs);
406  euclidean->set_disable_sqrt(true);
407 
408  for (int32_t j = 0; j < k; ++j)
409  {
410  for (std::size_t ii = 0; ii < iidxs.size(); ++ii)
411  {
412  for (std::size_t jj = 0; jj < gtidxs.size(); ++jj)
413  {
414  // FIXME study if using upper bounded distances can be an improvement
415  float64_t distance = euclidean->distance(ii,jj);
416 
417  if (distance <= sqdists(j,iidxs[ii]))
418  N.insert( CImpostorNode(iidxs[ii], target_nn(j,iidxs[ii]), gtidxs[jj]) );
419 
420  if (distance <= sqdists(j,gtidxs[jj]))
421  N.insert( CImpostorNode(gtidxs[jj], target_nn(j,gtidxs[jj]), iidxs[ii]) );
422  }
423  }
424  }
425 
426  SG_UNREF(euclidean);
427  }
428 
429  SG_UNREF(lx);
430 
431  SG_SDEBUG("Leaving CLMNNImpl::find_impostors_exact().\n")
432 
433  return N;
434 }
435 
436 ImpostorsSetType CLMNNImpl::find_impostors_approx(MatrixXd& LX, const MatrixXd& sqdists,
437  const ImpostorsSetType& Nexact, const SGMatrix<index_t> target_nn)
438 {
439  SG_SDEBUG("Entering CLMNNImpl::find_impostors_approx().\n")
440 
441  // initialize empty impostors set
442  ImpostorsSetType N = ImpostorsSetType();
443 
444  // compute square distances from examples to impostors
445  SGVector<float64_t> impostors_sqdists = CLMNNImpl::compute_impostors_sqdists(LX,Nexact);
446 
447  // find in the exact set of impostors computed last, the triplets that remain impostors
448  index_t i = 0;
449  for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
450  {
451  // find in target_nn(:,it->example) the position of the target neighbor it->target
452  index_t target_idx = 0;
453  while (target_nn(target_idx, it->example)!=it->target && target_idx<target_nn.num_rows)
454  ++target_idx;
455 
456  REQUIRE(target_idx<target_nn.num_rows, "The index of the target neighbour in the "
457  "impostors set was not found in the target neighbours matrix. "
458  "There must be a bug in find_impostors_exact.\n")
459 
460  if ( impostors_sqdists[i++] <= sqdists(target_idx, it->example) )
461  N.insert(*it);
462  }
463 
464  SG_SDEBUG("Leaving CLMNNImpl::find_impostors_approx().\n")
465 
466  return N;
467 }
468 
469 SGVector<float64_t> CLMNNImpl::compute_impostors_sqdists(MatrixXd& LX, const ImpostorsSetType& Nexact)
470 {
471  // get the number of examples
472  int32_t n = LX.cols();
473  // get the number of features
474  int32_t d = LX.rows();
475  // get the number of impostors
476  size_t num_impostors = Nexact.size();
477 
479 
480  // create Shogun features from LX and distance
481  SGMatrix<float64_t> lx_mat(LX.data(), d, n, false);
483  CEuclideanDistance* euclidean = new CEuclideanDistance(lx,lx);
484  euclidean->set_disable_sqrt(true);
485 
486  // initialize vector of square distances
487  SGVector<float64_t> sqdists(num_impostors);
488  // compute square distances
489  index_t i = 0;
490  for (ImpostorsSetType::iterator it = Nexact.begin(); it != Nexact.end(); ++it)
491  sqdists[i++] = euclidean->distance(it->example,it->impostor);
492 
493  // clean up distance
494  SG_UNREF(euclidean);
495 
496  return sqdists;
497 }
498 
499 std::vector<index_t> CLMNNImpl::get_examples_label(CMulticlassLabels* y,
500  float64_t yi)
501 {
502  // indices of the examples with label equal to yi
503  std::vector<index_t> idxs;
504 
505  for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
506  {
507  if (y->get_label(i) == yi)
508  idxs.push_back(i);
509  }
510 
511  return idxs;
512 }
513 
514 std::vector<index_t> CLMNNImpl::get_examples_gtlabel(CMulticlassLabels* y,
515  float64_t yi)
516 {
517  // indices of the examples with label equal greater than yi
518  std::vector<index_t> idxs;
519 
520  for (index_t i = 0; i < index_t(y->get_num_labels()); ++i)
521  {
522  if (y->get_label(i) > yi)
523  idxs.push_back(i);
524  }
525 
526  return idxs;
527 }
528 
529 CEuclideanDistance* CLMNNImpl::setup_distance(CDenseFeatures<float64_t>* x,
530  std::vector<index_t>& a, std::vector<index_t>& b)
531 {
532  // create new features only containing examples whose indices are in a
533  x->add_subset(SGVector<index_t>(a.data(), a.size(), false));
535  x->remove_subset();
536 
537  // create new features only containing examples whose indices are in b
538  x->add_subset(SGVector<index_t>(b.data(), b.size(), false));
540  x->remove_subset();
541 
542  // create and return distance
543  CEuclideanDistance* euclidean = new CEuclideanDistance(afeats,bfeats);
544  return euclidean;
545 }
546 
547 #endif /* HAVE_EIGEN3 */

SHOGUN Machine Learning Toolbox - Documentation