SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
LibLinearMTL.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011-2012 Christian Widmer
8  * Written (W) 2007-2010 Soeren Sonnenburg
9  * Copyright (c) 2007-2009 The LIBLINEAR Project.
10  * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #include <vector>
14 
15 #include <shogun/lib/config.h>
16 
17 #ifdef HAVE_LAPACK
18 #include <shogun/io/SGIO.h>
19 #include <shogun/lib/Signal.h>
20 #include <shogun/lib/Time.h>
21 #include <shogun/base/Parameter.h>
25 
26 using namespace shogun;
27 
28 
31 {
32  init();
33 }
34 
36  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
38 {
39  init();
40  C1=C;
41  C2=C;
42  use_bias=true;
43 
44  set_features(traindat);
45  set_labels(trainlab);
46 
47 }
48 
49 
50 void CLibLinearMTL::init()
51 {
52  use_bias=false;
53  C1=1;
54  C2=1;
56  epsilon=1e-5;
57 
58  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
59  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
60  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
62  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
63  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
65 
66 }
67 
69 {
70 }
71 
73 {
76 
77  if (data)
78  {
79  if (!data->has_property(FP_DOT))
80  SG_ERROR("Specified features are not of type CDotFeatures\n")
81 
82  set_features((CDotFeatures*) data);
83  }
86 
87 
88  int32_t num_train_labels=m_labels->get_num_labels();
89  int32_t num_feat=features->get_dim_feature_space();
90  int32_t num_vec=features->get_num_vectors();
91 
92  if (num_vec!=num_train_labels)
93  {
94  SG_ERROR("number of vectors %d does not match "
95  "number of training labels %d\n",
96  num_vec, num_train_labels);
97  }
98 
99 
100  float64_t* training_w = NULL;
101  if (use_bias)
102  training_w=SG_MALLOC(float64_t, num_feat+1);
103  else
104  training_w=SG_MALLOC(float64_t, num_feat+0);
105 
106  liblinear_problem prob;
107  if (use_bias)
108  {
109  prob.n=num_feat+1;
110  memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
111  }
112  else
113  {
114  prob.n=num_feat;
115  memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
116  }
117  prob.l=num_vec;
118  prob.x=features;
119  prob.y=SG_MALLOC(float64_t, prob.l);
120  prob.use_bias=use_bias;
121 
122  for (int32_t i=0; i<prob.l; i++)
123  prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
124 
125  int pos = 0;
126  int neg = 0;
127  for(int i=0;i<prob.l;i++)
128  {
129  if(prob.y[i]==+1)
130  pos++;
131  }
132  neg = prob.l - pos;
133 
134  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
135  SG_INFO("%d positives, %d negatives\n", pos, neg)
136 
137  double Cp=C1;
138  double Cn=C2;
139  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
140 
141  if (use_bias)
142  set_bias(training_w[num_feat]);
143  else
144  set_bias(0);
145 
146  SG_FREE(prob.y);
147 
148  SGVector<float64_t> w(num_feat);
149  for (int32_t i=0; i<num_feat; i++)
150  w[i] = training_w[i];
151  set_w(w);
152 
153  return true;
154 }
155 
156 // A coordinate descent algorithm for
157 // L1-loss and L2-loss SVM dual problems
158 //
159 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
160 // s.t. 0 <= alpha_i <= upper_bound_i,
161 //
162 // where Qij = yi yj xi^T xj and
163 // D is a diagonal matrix
164 //
165 // In L1-SVM case:
166 // upper_bound_i = Cp if y_i = 1
167 // upper_bound_i = Cn if y_i = -1
168 // D_ii = 0
169 // In L2-SVM case:
170 // upper_bound_i = INF
171 // D_ii = 1/(2*Cp) if y_i = 1
172 // D_ii = 1/(2*Cn) if y_i = -1
173 //
174 // Given:
175 // x, y, Cp, Cn
176 // eps is the stopping tolerance
177 //
178 // solution will be put in w
179 
180 #undef GETI
181 #define GETI(i) (y[i]+1)
182 // To support weights for instances, use GETI(i) (i)
183 
184 
185 void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn)
186 {
187 
188 
189 
190  int l = prob->l;
191  int w_size = prob->n;
192  int i, s, iter = 0;
193  double C, d, G;
194  double *QD = SG_MALLOC(double, l);
195  int *index = SG_MALLOC(int, l);
196  //double *alpha = SG_MALLOC(double, l);
197 
198  int32_t *y = SG_MALLOC(int32_t, l);
199  int active_size = l;
200  // PG: projected gradient, for shrinking and stopping
201  double PG;
202  double PGmax_old = CMath::INFTY;
203  double PGmin_old = -CMath::INFTY;
204  double PGmax_new, PGmin_new;
205 
206  // matrix W
207  V = SGMatrix<float64_t>(w_size,num_tasks);
208 
209  // save alpha
211 
212 
213  // default solver_type: L2R_L2LOSS_SVC_DUAL
214  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
215  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
216  if(true)
217  {
218  diag[0] = 0;
219  diag[2] = 0;
220  upper_bound[0] = Cn;
221  upper_bound[2] = Cp;
222  }
223 
224  int n = prob->n;
225 
226  if (prob->use_bias)
227  n--;
228 
229  // set V to zero
230  for(int32_t k=0; k<w_size*num_tasks; k++)
231  {
232  V.matrix[k] = 0;
233  }
234 
235  // init alphas
236  for(i=0; i<l; i++)
237  {
238  alphas[i] = 0;
239  }
240 
241  for(i=0; i<l; i++)
242  {
243  if(prob->y[i] > 0)
244  {
245  y[i] = +1;
246  }
247  else
248  {
249  y[i] = -1;
250  }
251  QD[i] = diag[GETI(i)];
252  QD[i] += prob->x->dot(i, prob->x,i);
253  index[i] = i;
254  }
255 
256  CTime start_time;
257  while (iter < max_iterations && !CSignal::cancel_computations())
258  {
259  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
260  break;
261 
262  PGmax_new = -CMath::INFTY;
263  PGmin_new = CMath::INFTY;
264 
265  for (i=0; i<active_size; i++)
266  {
267  int j = CMath::random(i, active_size-1);
268  CMath::swap(index[i], index[j]);
269  }
270 
271  for (s=0;s<active_size;s++)
272  {
273  i = index[s];
274  int32_t yi = y[i];
275  int32_t ti = task_indicator_lhs[i];
276  C = upper_bound[GETI(i)];
277 
278  // we compute the inner sum by looping over tasks
279  // this update is the main result of MTL_DCD
280  typedef std::map<index_t, float64_t>::const_iterator map_iter;
281 
282  float64_t inner_sum = 0;
283  for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
284  {
285 
286  // get data from sparse matrix
287  int32_t e_i = it->first;
288  float64_t sim = it->second;
289 
290  // fetch vector
291  float64_t* tmp_w = V.get_column_vector(e_i);
292  inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
293 
294  //possibly deal with bias
295  //if (prob->use_bias)
296  // G+=w[n];
297  }
298 
299  // compute gradient
300  G = inner_sum-1.0;
301 
302  // check if point can be removed from active set
303  PG = 0;
304  if (alphas[i] == 0)
305  {
306  if (G > PGmax_old)
307  {
308  active_size--;
309  CMath::swap(index[s], index[active_size]);
310  s--;
311  continue;
312  }
313  else if (G < 0)
314  PG = G;
315  }
316  else if (alphas[i] == C)
317  {
318  if (G < PGmin_old)
319  {
320  active_size--;
321  CMath::swap(index[s], index[active_size]);
322  s--;
323  continue;
324  }
325  else if (G > 0)
326  PG = G;
327  }
328  else
329  PG = G;
330 
331  PGmax_new = CMath::max(PGmax_new, PG);
332  PGmin_new = CMath::min(PGmin_new, PG);
333 
334  if(fabs(PG) > 1.0e-12)
335  {
336  // save previous alpha
337  double alpha_old = alphas[i];
338 
339  // project onto feasible set
340  alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
341  d = (alphas[i] - alpha_old)*yi;
342 
343  // update corresponding weight vector
344  float64_t* tmp_w = V.get_column_vector(ti);
345  prob->x->add_to_dense_vec(d, i, tmp_w, n);
346 
347 
348  //if (prob->use_bias)
349  // w[n]+=d;
350  }
351  }
352 
353  iter++;
354  float64_t gap=PGmax_new - PGmin_new;
355  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
356 
357  if(gap <= eps)
358  {
359  if(active_size == l)
360  break;
361  else
362  {
363  active_size = l;
364  PGmax_old = CMath::INFTY;
365  PGmin_old = -CMath::INFTY;
366  continue;
367  }
368  }
369  PGmax_old = PGmax_new;
370  PGmin_old = PGmin_new;
371  if (PGmax_old <= 0)
372  PGmax_old = CMath::INFTY;
373  if (PGmin_old >= 0)
374  PGmin_old = -CMath::INFTY;
375  }
376 
377  SG_DONE()
378  SG_INFO("optimization finished, #iter = %d\n",iter)
379  if (iter >= max_iterations)
380  {
381  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
382  "(also see liblinear FAQ)\n\n");
383  }
384 
385 
386 
387  delete [] QD;
388  //delete [] alpha;
389  delete [] y;
390  delete [] index;
391 }
392 
393 
395 {
396  /* python protype
397  num_param = param.shape[0]
398  num_dim = len(all_xt[0])
399  num_tasks = int(num_param / num_dim)
400  num_examples = len(all_xt)
401 
402 # vector to matrix
403 W = param.reshape(num_tasks, num_dim)
404 
405 obj = 0
406 
407 reg_obj = 0
408 loss_obj = 0
409 
410 assert len(all_xt) == len(all_xt) == len(task_indicator)
411 
412 # L2 regularizer
413 for t in xrange(num_tasks):
414 reg_obj += 0.5 * np.dot(W[t,:], W[t,:])
415 
416 # MTL regularizer
417 for s in xrange(num_tasks):
418 for t in xrange(num_tasks):
419 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])
420 
421 # loss
422 for i in xrange(num_examples):
423 ti = task_indicator[i]
424 t = all_lt[i] * np.dot(W[ti,:], all_xt[i])
425 # hinge
426 loss_obj += max(0, 1 - t)
427 
428 
429 # combine to final objective
430 obj = reg_obj + C * loss_obj
431 
432 
433 return obj
434 */
435 
436  SG_INFO("DONE to compute Primal OBJ\n")
437  // calculate objective value
439 
440  float64_t obj = 0;
441  int32_t num_vec = features->get_num_vectors();
442  int32_t w_size = features->get_dim_feature_space();
443 
444  // L2 regularizer
445  for (int32_t t=0; t<num_tasks; t++)
446  {
447  float64_t* w_t = W.get_column_vector(t);
448 
449  for(int32_t i=0; i<w_size; i++)
450  {
451  obj += 0.5 * w_t[i]*w_t[i];
452  }
453  }
454 
455  // MTL regularizer
456  for (int32_t s=0; s<num_tasks; s++)
457  {
458  float64_t* w_s = W.get_column_vector(s);
459  for (int32_t t=0; t<num_tasks; t++)
460  {
461  float64_t* w_t = W.get_column_vector(t);
462  float64_t l = graph_laplacian.matrix[s*num_tasks+t];
463 
464  for(int32_t i=0; i<w_size; i++)
465  {
466  obj += 0.5 * l * w_s[i]*w_t[i];
467  }
468  }
469  }
470 
471  // loss
472  for(int32_t i=0; i<num_vec; i++)
473  {
474  int32_t ti = task_indicator_lhs[i];
475  float64_t* w_t = W.get_column_vector(ti);
476  float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
477 
478  // hinge loss
479  obj += C1 * CMath::max(0.0, 1 - residual);
480 
481  }
482 
483  SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj)
484 
485  return obj;
486 }
487 
489 {
490  /* python prototype
491  num_xt = len(xt)
492 
493 # compute quadratic term
494 for i in xrange(num_xt):
495 for j in xrange(num_xt):
496 
497 s = task_indicator[i]
498 t = task_indicator[j]
499 
500 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])
501 
502 return obj
503 */
504 
505  SG_INFO("starting to compute DUAL OBJ\n")
506 
507  int32_t num_vec=features->get_num_vectors();
508 
509  float64_t obj = 0;
510 
511  // compute linear term
512  for(int32_t i=0; i<num_vec; i++)
513  {
514  obj += alphas[i];
515  }
516 
517  // compute quadratic term
518 
519  int32_t v_size = features->get_dim_feature_space();
520 
521  // efficient computation
522  for (int32_t s=0; s<num_tasks; s++)
523  {
524  float64_t* v_s = V.get_column_vector(s);
525  for (int32_t t=0; t<num_tasks; t++)
526  {
527  float64_t* v_t = V.get_column_vector(t);
528  const float64_t ts = task_similarity_matrix(s, t);
529 
530  for(int32_t i=0; i<v_size; i++)
531  {
532  obj -= 0.5 * ts * v_s[i]*v_t[i];
533  }
534  }
535  }
536 
537  /*
538  // naiive implementation
539  float64_t tmp_val2 = 0;
540 
541  for(int32_t i=0; i<num_vec; i++)
542  {
543  int32_t ti_i = task_indicator_lhs[i];
544  for(int32_t j=0; j<num_vec; j++)
545  {
546  // look up task similarity
547  int32_t ti_j = task_indicator_lhs[j];
548 
549  const float64_t ts = task_similarity_matrix(ti_i, ti_j);
550 
551  // compute objective
552  tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) *
553  ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);
554  }
555  }
556  */
557 
558 
559  return obj;
560 }
561 
562 
564 {
565  return 0.0;
566 }
567 
568 
569 #endif //HAVE_LAPACK
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
#define SG_INFO(...)
Definition: SGIO.h:117
#define SG_DONE()
Definition: SGIO.h:156
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2069
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_labels() const =0
virtual float64_t compute_dual_obj()
static float64_t log10(float64_t v)
Definition: Math.h:892
virtual float64_t compute_primal_obj()
virtual int32_t get_num_vectors() const =0
float64_t m_max_train_time
Definition: Machine.h:362
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
SGMatrix< float64_t > V
Definition: LibLinearMTL.h:347
Features that support dot products among other operations.
Definition: DotFeatures.h:44
static uint64_t random()
Definition: Math.h:1014
virtual int32_t get_dim_feature_space() const =0
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
#define ASSERT(x)
Definition: SGIO.h:200
std::vector< std::map< index_t, float64_t > > data
Definition: LibLinearMTL.h:85
static void clear_cancel()
Definition: Signal.cpp:126
double float64_t
Definition: common.h:60
virtual void set_features(CDotFeatures *feat)
virtual bool train_machine(CFeatures *data=NULL)
T * get_column_vector(index_t col) const
Definition: SGMatrix.h:140
static T max(T a, T b)
Definition: Math.h:164
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
static bool cancel_computations()
Definition: Signal.h:111
virtual float64_t compute_duality_gap()
CDotFeatures * features
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define GETI(i)
SGMatrix< float64_t > get_W()
Definition: LibLinearMTL.h:237
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:153
SGVector< int32_t > task_indicator_lhs
Definition: LibLinearMTL.h:333
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
MappedSparseMatrix task_similarity_matrix
Definition: LibLinearMTL.h:341
static void swap(T &a, T &b)
Definition: Math.h:433
virtual void set_bias(float64_t b)
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:94
SGMatrix< float64_t > graph_laplacian
Definition: LibLinearMTL.h:344
void set_max_iterations(int32_t max_iter=1000)
Definition: LibLinearMTL.h:171
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
#define SG_SABS_PROGRESS(...)
Definition: SGIO.h:187
virtual void ensure_valid(const char *context=NULL)=0
SGVector< float64_t > alphas
Definition: LibLinearMTL.h:327

SHOGUN Machine Learning Toolbox - Documentation