SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
slep_mc_plain_lr.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
9  */
10 
12 #ifdef USE_GPL_SHOGUN
13 
17 #include <shogun/lib/Signal.h>
18 #include <shogun/lib/Time.h>
19 #include <iostream>
20 
21 using namespace shogun;
22 using namespace Eigen;
23 using namespace std;
24 
25 namespace shogun
26 {
27 
28 slep_result_t slep_mc_plain_lr(
29  CDotFeatures* features,
30  CMulticlassLabels* labels,
31  float64_t z,
32  const slep_options& options)
33 {
34  int i,j;
35  // obtain problem parameters
36  int n_feats = features->get_dim_feature_space();
37  int n_vecs = features->get_num_vectors();
38  int n_classes = labels->get_num_classes();
39 
40  // labels vector containing values in range (0 .. n_classes)
41  SGVector<float64_t> labels_vector = labels->get_labels();
42 
43  // initialize matrices and vectors to be used
44  // weight vector
45  MatrixXd w = MatrixXd::Zero(n_feats, n_classes);
46  // intercepts (biases)
47  VectorXd c = VectorXd::Zero(n_classes);
48 
49  if (options.last_result)
50  {
51  SGMatrix<float64_t> last_w = options.last_result->w;
52  SGVector<float64_t> last_c = options.last_result->c;
53  for (i=0; i<n_classes; i++)
54  {
55  c[i] = last_c[i];
56  for (j=0; j<n_feats; j++)
57  w(j,i) = last_w(j,i);
58  }
59  }
60  // iterative process matrices and vectors
61  MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes);
62  VectorXd cp = c, ccp = VectorXd::Zero(n_classes);
63  // search point weight vector
64  MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes);
65  // search point intercepts
66  VectorXd search_c = VectorXd::Zero(n_classes);
67  // dot products
68  MatrixXd Aw = MatrixXd::Zero(n_vecs, n_classes);
69  for (j=0; j<n_classes; j++)
70  features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
71  MatrixXd As = MatrixXd::Zero(n_vecs, n_classes);
72  MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes);
73  // gradients
74  MatrixXd g = MatrixXd::Zero(n_feats, n_classes);
75  VectorXd gc = VectorXd::Zero(n_classes);
76  // projection
77  MatrixXd v = MatrixXd::Zero(n_feats, n_classes);
78 
79  // Lipschitz continuous gradient parameter for line search
80  double L = 1.0/(n_vecs*n_classes);
81  // coefficients for search point computation
82  double alphap = 0, alpha = 1;
83 
84  // lambda regularization parameter
85  double lambda = z;
86  // objective values
87  double objective = 0.0;
88  double objective_p = 0.0;
89 
90  int iter = 0;
91  bool done = false;
92  CTime time;
93  //internal::set_is_malloc_allowed(false);
94  while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations()))
95  {
96  double beta = (alphap-1)/alpha;
97  // compute search points
98  search_w = w + beta*wwp;
99  search_c = c + beta*ccp;
100 
101  // update dot products with search point
102  As = Aw + beta*(Aw-Awp);
103 
104  // compute objective and gradient at search point
105  double fun_s = 0;
106  g.setZero();
107  gc.setZero();
108  // for each vector
109  for (i=0; i<n_vecs; i++)
110  {
111  // class of current vector
112  int vec_class = labels_vector[i];
113  // for each class
114  for (j=0; j<n_classes; j++)
115  {
116  // compute logistic loss
117  double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j));
118  double bb = aa > 0.0 ? aa : 0.0;
119  // avoid underflow via log-sum-exp trick
120  fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
121  double prob = 1.0/(1+CMath::exp(aa));
122  double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob);
123  // update gradient of intercepts
124  gc[j] += b;
125  // update gradient of weight vectors
126  features->add_to_dense_vec(b, i, g.col(j).data(), n_feats);
127  }
128  }
129  //fun_s /= (n_vecs*n_classes);
130 
131  wp = w;
132  Awp = Aw;
133  cp = c;
134 
135  int inner_iter = 0;
136  double fun_x = 0;
137 
138  // line search process
139  while (inner_iter<5000)
140  {
141  // compute line search point
142  v = search_w - g/L;
143  c = search_c - gc/L;
144 
145  // compute projection of gradient
146  eppMatrix(w.data(),v.data(),n_feats,n_classes,lambda/L,options.q);
147 
148  v = w - search_w;
149 
150  // update dot products
151  for (j=0; j<n_classes; j++)
152  features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0);
153 
154  // compute objective at search point
155  fun_x = 0;
156  for (i=0; i<n_vecs; i++)
157  {
158  int vec_class = labels_vector[i];
159  for (j=0; j<n_classes; j++)
160  {
161  double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j));
162  double bb = aa > 0.0 ? aa : 0.0;
163  fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb;
164  }
165  }
166  //fun_x /= (n_vecs*n_classes);
167 
168  // check for termination of line search
169  double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2;
170  double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc);
171 
172  // stop if projected gradient is less than 1e-20
173  if (r_sum <= 1e-20)
174  {
175  SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum)
176  done = true;
177  break;
178  }
179 
180  if (l_sum <= r_sum*L)
181  break;
182  else
183  L = CMath::max(2*L, l_sum/r_sum);
184 
185  inner_iter++;
186  }
187 
188  // update alpha coefficients
189  alphap = alpha;
190  alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2;
191 
192  // update wwp and ccp
193  wwp = w - wp;
194  ccp = c - cp;
195 
196  // update objectives
197  objective_p = objective;
198  objective = fun_x;
199 
200  // regularize objective with tree norm
201  double L1q_norm = 0.0;
202  for (int m=0; m<n_classes; m++)
203  L1q_norm += w.col(m).norm();
204  objective += lambda*L1q_norm;
205 
206  //cout << "Objective = " << objective << endl;
207 
208  // check for termination of whole process
209  if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2))
210  {
211  SG_SINFO("Objective changes less than tolerance\n")
212  done = true;
213  }
214 
215  iter++;
216  }
217  SG_SINFO("%d iterations passed, objective = %f\n",iter,objective)
218  //internal::set_is_malloc_allowed(true);
219 
220  // output computed weight vectors and intercepts
221  SGMatrix<float64_t> r_w(n_feats,n_classes);
222  for (j=0; j<n_classes; j++)
223  {
224  for (i=0; i<n_feats; i++)
225  r_w(i,j) = w(i,j);
226  }
227  //r_w.display_matrix();
228  SGVector<float64_t> r_c(n_classes);
229  for (j=0; j<n_classes; j++)
230  r_c[j] = c[j];
231  return slep_result_t(r_w, r_c);
232 };
233 };
234 
235 #endif //USE_GPL_SHOGUN
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
Definition: DotFeatures.cpp:67
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:58
Definition: SGMatrix.h:20
virtual int32_t get_num_vectors() const =0
Definition: basetag.h:132
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
Features that support dot products among other operations.
Definition: DotFeatures.h:44
virtual int32_t get_dim_feature_space() const =0
SGVector< float64_t > get_labels()
Definition: DenseLabels.cpp:82
Multiclass Labels for multi-class classification.
double float64_t
Definition: common.h:50
static bool cancel_computations()
Definition: Signal.h:86
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
static float64_t exp(float64_t x)
Definition: Math.h:621
#define SG_SINFO(...)
Definition: SGIO.h:173
static float64_t log(float64_t v)
Definition: Math.h:922
Class which collects generic mathematical functions.
Definition: Math.h:134
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68

SHOGUN Machine Learning Toolbox - Documentation