SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
slep_mc_plain_lr.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
9  */
10 
12 #ifdef HAVE_EIGEN3
16 #include <shogun/lib/Signal.h>
17 #include <shogun/lib/Time.h>
18 #include <iostream>
19 
20 using namespace shogun;
21 using namespace Eigen;
22 using namespace std;
23 
24 namespace shogun
25 {
26 
27 slep_result_t slep_mc_plain_lr(
28  CDotFeatures* features,
29  CMulticlassLabels* labels,
30  float64_t z,
31  const slep_options& options)
32 {
33  int i,j;
34  // obtain problem parameters
35  int n_feats = features->get_dim_feature_space();
36  int n_vecs = features->get_num_vectors();
37  int n_classes = labels->get_num_classes();
38 
39  // labels vector containing values in range (0 .. n_classes)
40  SGVector<float64_t> labels_vector = labels->get_labels();
41 
42  // initialize matrices and vectors to be used
43  // weight vector
44  MatrixXd w = MatrixXd::Zero(n_feats, n_classes);
45  // intercepts (biases)
46  VectorXd c = VectorXd::Zero(n_classes);
47 
48  if (options.last_result)
49  {
50  SGMatrix<float64_t> last_w = options.last_result->w;
51  SGVector<float64_t> last_c = options.last_result->c;
52  for (i=0; i<n_classes; i++)
53  {
54  c[i] = last_c[i];
55  for (j=0; j<n_feats; j++)
56  w(j,i) = last_w(j,i);
57  }
58  }
59  // iterative process matrices and vectors
60  MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
61  VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
62  // search point weight vector
63  MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
64  // search point intercepts
65  VectorXd search_c = VectorXd::Zero(n_classes);
66  // dot products
67  MatrixXd Aw = MatrixXd::Zero(n_vecs, n_classes);
68  for (j=0; j<n_classes; j++)
69  features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
70  MatrixXd As = MatrixXd::Zero(n_vecs, n_classes);
71  MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
72  // gradients
73  MatrixXd g = MatrixXd::Zero(n_feats, n_classes);
74  VectorXd gc = VectorXd::Zero(n_classes);
75  // projection
76  MatrixXd v = MatrixXd::Zero(n_feats, n_classes);
77 
78  // Lipschitz continuous gradient parameter for line search
79  double L = 1.0/(n_vecs*n_classes);
80  // coefficients for search point computation
81  double alphap = 0, alpha = 1;
82 
83  // lambda regularization parameter
84  double lambda = z;
85  // objective values
86  double objective = 0.0;
87  double objective_p = 0.0;
88 
89  int iter = 0;
90  bool done = false;
91  CTime time;
92  //internal::set_is_malloc_allowed(false);
93  while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
94  {
95  double beta = (alphap-1)/alpha;
96  // compute search points
97  search_w = w + beta*wwp;
98  search_c = c + beta*ccp;
99 
100  // update dot products with search point
101  As = Aw + beta*(Aw-Awp);
102 
103  // compute objective and gradient at search point
104  double fun_s = 0;
105  g.setZero();
106  gc.setZero();
107  // for each vector
108  for (i=0; i<n_vecs; i++)
109  {
110  // class of current vector
111  int vec_class = labels_vector[i];
112  // for each class
113  for (j=0; j<n_classes; j++)
114  {
115  // compute logistic loss
116  double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
117  double bb = aa > 0.0 ? aa : 0.0;
118  // avoid underflow via log-sum-exp trick
119  fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
120  double prob = 1.0/(1+CMath::exp(aa));
121  double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);
122  // update gradient of intercepts
123  gc[j] += b;
124  // update gradient of weight vectors
125  features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
126  }
127  }
128  //fun_s /= (n_vecs*n_classes);
129 
130  wp = w;
131  Awp = Aw;
132  cp = c;
133 
134  int inner_iter = 0;
135  double fun_x = 0;
136 
137  // line search process
138  while (inner_iter<5000)
139  {
140  // compute line search point
141  v = search_w - g/L;
142  c = search_c - gc/L;
143 
144  // compute projection of gradient
145  eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);
146 
147  v = w - search_w;
148 
149  // update dot products
150  for (j=0; j<n_classes; j++)
151  features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
152 
153  // compute objective at search point
154  fun_x = 0;
155  for (i=0; i<n_vecs; i++)
156  {
157  int vec_class = labels_vector[i];
158  for (j=0; j<n_classes; j++)
159  {
160  double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
161  double bb = aa > 0.0 ? aa : 0.0;
162  fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
163  }
164  }
165  //fun_x /= (n_vecs*n_classes);
166 
167  // check for termination of line search
168  double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
169  double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
170 
171  // stop if projected gradient is less than 1e-20
172  if (r_sum <= 1e-20)
173  {
174  SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum)
175  done = true;
176  break;
177  }
178 
179  if (l_sum <= r_sum*L)
180  break;
181  else
182  L = CMath::max(2*L, l_sum/r_sum);
183 
184  inner_iter++;
185  }
186 
187  // update alpha coefficients
188  alphap = alpha;
189  alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
190 
191  // update wwp and ccp
192  wwp = w - wp;
193  ccp = c - cp;
194 
195  // update objectives
196  objective_p = objective;
197  objective = fun_x;
198 
199  // regularize objective with tree norm
200  double L1q_norm = 0.0;
201  for (int m=0; m<n_classes; m++)
202  L1q_norm += w.col(m).norm();
203  objective += lambda*L1q_norm;
204 
205  //cout << "Objective = " << objective << endl;
206 
207  // check for termination of whole process
208  if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
209  {
210  SG_SINFO("Objective changes less than tolerance\n")
211  done = true;
212  }
213 
214  iter++;
215  }
216  SG_SINFO("%d iterations passed, objective = %f\n",iter,objective)
217  //internal::set_is_malloc_allowed(true);
218 
219  // output computed weight vectors and intercepts
220  SGMatrix<float64_t> r_w(n_feats,n_classes);
221  for (j=0; j<n_classes; j++)
222  {
223  for (i=0; i<n_feats; i++)
224  r_w(i,j) = w(i,j);
225  }
226  //r_w.display_matrix();
227  SGVector<float64_t> r_c(n_classes);
228  for (j=0; j<n_classes; j++)
229  r_c[j] = c[j];
230  return slep_result_t(r_w, r_c);
231 };
232 };
233 #endif

SHOGUN Machine Learning Toolbox - Documentation