SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibLinearMTL.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011-2012 Christian Widmer
8  * Written (W) 2007-2010 Soeren Sonnenburg
9  * Copyright (c) 2007-2009 The LIBLINEAR Project.
10  * Copyright (C) 2007-2012 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #include <vector>
14 
15 #include <shogun/lib/config.h>
16 
17 #ifdef HAVE_LAPACK
18 #include <shogun/io/SGIO.h>
19 #include <shogun/lib/Signal.h>
20 #include <shogun/lib/Time.h>
21 #include <shogun/base/Parameter.h>
25 
26 using namespace shogun;
27 
28 
31 {
32  init();
33 }
34 
36  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
38 {
39  init();
40  C1=C;
41  C2=C;
42  use_bias=true;
43 
44  set_features(traindat);
45  set_labels(trainlab);
46 
47 }
48 
49 
50 void CLibLinearMTL::init()
51 {
52  use_bias=false;
53  C1=1;
54  C2=1;
56  epsilon=1e-5;
57 
58  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
59  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
60  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
62  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
63  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
65 
66 }
67 
69 {
70 }
71 
73 {
76 
77  if (data)
78  {
79  if (!data->has_property(FP_DOT))
80  SG_ERROR("Specified features are not of type CDotFeatures\n")
81 
82  set_features((CDotFeatures*) data);
83  }
86 
87 
88  int32_t num_train_labels=m_labels->get_num_labels();
89  int32_t num_feat=features->get_dim_feature_space();
90  int32_t num_vec=features->get_num_vectors();
91 
92  if (num_vec!=num_train_labels)
93  {
94  SG_ERROR("number of vectors %d does not match "
95  "number of training labels %d\n",
96  num_vec, num_train_labels);
97  }
98 
99 
100  float64_t* training_w = NULL;
101  if (use_bias)
102  training_w=SG_MALLOC(float64_t, num_feat+1);
103  else
104  training_w=SG_MALLOC(float64_t, num_feat+0);
105 
106  liblinear_problem prob;
107  if (use_bias)
108  {
109  prob.n=num_feat+1;
110  memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
111  }
112  else
113  {
114  prob.n=num_feat;
115  memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
116  }
117  prob.l=num_vec;
118  prob.x=features;
119  prob.y=SG_MALLOC(float64_t, prob.l);
120  prob.use_bias=use_bias;
121 
122  for (int32_t i=0; i<prob.l; i++)
123  prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
124 
125  int pos = 0;
126  int neg = 0;
127  for(int i=0;i<prob.l;i++)
128  {
129  if(prob.y[i]==+1)
130  pos++;
131  }
132  neg = prob.l - pos;
133 
134  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
135  SG_INFO("%d positives, %d negatives\n", pos, neg)
136 
137  double Cp=C1;
138  double Cn=C2;
139  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
140 
141  if (use_bias)
142  set_bias(training_w[num_feat]);
143  else
144  set_bias(0);
145 
146  SG_FREE(prob.y);
147 
148  w = SGVector<float64_t>(num_feat);
149  for (int32_t i=0; i<num_feat; i++)
150  w[i] = training_w[i];
151 
152  return true;
153 }
154 
155 // A coordinate descent algorithm for
156 // L1-loss and L2-loss SVM dual problems
157 //
158 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
159 // s.t. 0 <= alpha_i <= upper_bound_i,
160 //
161 // where Qij = yi yj xi^T xj and
162 // D is a diagonal matrix
163 //
164 // In L1-SVM case:
165 // upper_bound_i = Cp if y_i = 1
166 // upper_bound_i = Cn if y_i = -1
167 // D_ii = 0
168 // In L2-SVM case:
169 // upper_bound_i = INF
170 // D_ii = 1/(2*Cp) if y_i = 1
171 // D_ii = 1/(2*Cn) if y_i = -1
172 //
173 // Given:
174 // x, y, Cp, Cn
175 // eps is the stopping tolerance
176 //
177 // solution will be put in w
178 
179 #undef GETI
180 #define GETI(i) (y[i]+1)
181 // To support weights for instances, use GETI(i) (i)
182 
183 
184 void CLibLinearMTL::solve_l2r_l1l2_svc(const liblinear_problem *prob, double eps, double Cp, double Cn)
185 {
186 
187 
188 
189  int l = prob->l;
190  int w_size = prob->n;
191  int i, s, iter = 0;
192  double C, d, G;
193  double *QD = SG_MALLOC(double, l);
194  int *index = SG_MALLOC(int, l);
195  //double *alpha = SG_MALLOC(double, l);
196 
197  int32_t *y = SG_MALLOC(int32_t, l);
198  int active_size = l;
199  // PG: projected gradient, for shrinking and stopping
200  double PG;
201  double PGmax_old = CMath::INFTY;
202  double PGmin_old = -CMath::INFTY;
203  double PGmax_new, PGmin_new;
204 
205  // matrix W
206  V = SGMatrix<float64_t>(w_size,num_tasks);
207 
208  // save alpha
210 
211 
212  // default solver_type: L2R_L2LOSS_SVC_DUAL
213  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
214  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
215  if(true)
216  {
217  diag[0] = 0;
218  diag[2] = 0;
219  upper_bound[0] = Cn;
220  upper_bound[2] = Cp;
221  }
222 
223  int n = prob->n;
224 
225  if (prob->use_bias)
226  n--;
227 
228  // set V to zero
229  for(int32_t k=0; k<w_size*num_tasks; k++)
230  {
231  V.matrix[k] = 0;
232  }
233 
234  // init alphas
235  for(i=0; i<l; i++)
236  {
237  alphas[i] = 0;
238  }
239 
240  for(i=0; i<l; i++)
241  {
242  if(prob->y[i] > 0)
243  {
244  y[i] = +1;
245  }
246  else
247  {
248  y[i] = -1;
249  }
250  QD[i] = diag[GETI(i)];
251  QD[i] += prob->x->dot(i, prob->x,i);
252  index[i] = i;
253  }
254 
255  CTime start_time;
256  while (iter < max_iterations && !CSignal::cancel_computations())
257  {
258  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
259  break;
260 
261  PGmax_new = -CMath::INFTY;
262  PGmin_new = CMath::INFTY;
263 
264  for (i=0; i<active_size; i++)
265  {
266  int j = CMath::random(i, active_size-1);
267  CMath::swap(index[i], index[j]);
268  }
269 
270  for (s=0;s<active_size;s++)
271  {
272  i = index[s];
273  int32_t yi = y[i];
274  int32_t ti = task_indicator_lhs[i];
275  C = upper_bound[GETI(i)];
276 
277  // we compute the inner sum by looping over tasks
278  // this update is the main result of MTL_DCD
279  typedef std::map<index_t, float64_t>::const_iterator map_iter;
280 
281  float64_t inner_sum = 0;
282  for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
283  {
284 
285  // get data from sparse matrix
286  int32_t e_i = it->first;
287  float64_t sim = it->second;
288 
289  // fetch vector
290  float64_t* tmp_w = V.get_column_vector(e_i);
291  inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
292 
293  //possibly deal with bias
294  //if (prob->use_bias)
295  // G+=w[n];
296  }
297 
298  // compute gradient
299  G = inner_sum-1.0;
300 
301  // check if point can be removed from active set
302  PG = 0;
303  if (alphas[i] == 0)
304  {
305  if (G > PGmax_old)
306  {
307  active_size--;
308  CMath::swap(index[s], index[active_size]);
309  s--;
310  continue;
311  }
312  else if (G < 0)
313  PG = G;
314  }
315  else if (alphas[i] == C)
316  {
317  if (G < PGmin_old)
318  {
319  active_size--;
320  CMath::swap(index[s], index[active_size]);
321  s--;
322  continue;
323  }
324  else if (G > 0)
325  PG = G;
326  }
327  else
328  PG = G;
329 
330  PGmax_new = CMath::max(PGmax_new, PG);
331  PGmin_new = CMath::min(PGmin_new, PG);
332 
333  if(fabs(PG) > 1.0e-12)
334  {
335  // save previous alpha
336  double alpha_old = alphas[i];
337 
338  // project onto feasible set
339  alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
340  d = (alphas[i] - alpha_old)*yi;
341 
342  // update corresponding weight vector
343  float64_t* tmp_w = V.get_column_vector(ti);
344  prob->x->add_to_dense_vec(d, i, tmp_w, n);
345 
346 
347  //if (prob->use_bias)
348  // w[n]+=d;
349  }
350  }
351 
352  iter++;
353  float64_t gap=PGmax_new - PGmin_new;
354  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
355 
356  if(gap <= eps)
357  {
358  if(active_size == l)
359  break;
360  else
361  {
362  active_size = l;
363  PGmax_old = CMath::INFTY;
364  PGmin_old = -CMath::INFTY;
365  continue;
366  }
367  }
368  PGmax_old = PGmax_new;
369  PGmin_old = PGmin_new;
370  if (PGmax_old <= 0)
371  PGmax_old = CMath::INFTY;
372  if (PGmin_old >= 0)
373  PGmin_old = -CMath::INFTY;
374  }
375 
376  SG_DONE()
377  SG_INFO("optimization finished, #iter = %d\n",iter)
378  if (iter >= max_iterations)
379  {
380  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
381  "(also see liblinear FAQ)\n\n");
382  }
383 
384 
385 
386  delete [] QD;
387  //delete [] alpha;
388  delete [] y;
389  delete [] index;
390 }
391 
392 
394 {
395  /* python protype
396  num_param = param.shape[0]
397  num_dim = len(all_xt[0])
398  num_tasks = int(num_param / num_dim)
399  num_examples = len(all_xt)
400 
401 # vector to matrix
402 W = param.reshape(num_tasks, num_dim)
403 
404 obj = 0
405 
406 reg_obj = 0
407 loss_obj = 0
408 
409 assert len(all_xt) == len(all_xt) == len(task_indicator)
410 
411 # L2 regularizer
412 for t in xrange(num_tasks):
413 reg_obj += 0.5 * np.dot(W[t,:], W[t,:])
414 
415 # MTL regularizer
416 for s in xrange(num_tasks):
417 for t in xrange(num_tasks):
418 reg_obj += 0.5 * L[s,t] * np.dot(W[s,:], W[t,:])
419 
420 # loss
421 for i in xrange(num_examples):
422 ti = task_indicator[i]
423 t = all_lt[i] * np.dot(W[ti,:], all_xt[i])
424 # hinge
425 loss_obj += max(0, 1 - t)
426 
427 
428 # combine to final objective
429 obj = reg_obj + C * loss_obj
430 
431 
432 return obj
433 */
434 
435  SG_INFO("DONE to compute Primal OBJ\n")
436  // calculate objective value
438 
439  float64_t obj = 0;
440  int32_t num_vec = features->get_num_vectors();
441  int32_t w_size = features->get_dim_feature_space();
442 
443  // L2 regularizer
444  for (int32_t t=0; t<num_tasks; t++)
445  {
446  float64_t* w_t = W.get_column_vector(t);
447 
448  for(int32_t i=0; i<w_size; i++)
449  {
450  obj += 0.5 * w_t[i]*w_t[i];
451  }
452  }
453 
454  // MTL regularizer
455  for (int32_t s=0; s<num_tasks; s++)
456  {
457  float64_t* w_s = W.get_column_vector(s);
458  for (int32_t t=0; t<num_tasks; t++)
459  {
460  float64_t* w_t = W.get_column_vector(t);
461  float64_t l = graph_laplacian.matrix[s*num_tasks+t];
462 
463  for(int32_t i=0; i<w_size; i++)
464  {
465  obj += 0.5 * l * w_s[i]*w_t[i];
466  }
467  }
468  }
469 
470  // loss
471  for(int32_t i=0; i<num_vec; i++)
472  {
473  int32_t ti = task_indicator_lhs[i];
474  float64_t* w_t = W.get_column_vector(ti);
475  float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
476 
477  // hinge loss
478  obj += C1 * CMath::max(0.0, 1 - residual);
479 
480  }
481 
482  SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj)
483 
484  return obj;
485 }
486 
488 {
489  /* python prototype
490  num_xt = len(xt)
491 
492 # compute quadratic term
493 for i in xrange(num_xt):
494 for j in xrange(num_xt):
495 
496 s = task_indicator[i]
497 t = task_indicator[j]
498 
499 obj -= 0.5 * M[s,t] * alphas[i] * alphas[j] * lt[i] * lt[j] * np.dot(xt[i], xt[j])
500 
501 return obj
502 */
503 
504  SG_INFO("starting to compute DUAL OBJ\n")
505 
506  int32_t num_vec=features->get_num_vectors();
507 
508  float64_t obj = 0;
509 
510  // compute linear term
511  for(int32_t i=0; i<num_vec; i++)
512  {
513  obj += alphas[i];
514  }
515 
516  // compute quadratic term
517 
518  int32_t v_size = features->get_dim_feature_space();
519 
520  // efficient computation
521  for (int32_t s=0; s<num_tasks; s++)
522  {
523  float64_t* v_s = V.get_column_vector(s);
524  for (int32_t t=0; t<num_tasks; t++)
525  {
526  float64_t* v_t = V.get_column_vector(t);
527  const float64_t ts = task_similarity_matrix(s, t);
528 
529  for(int32_t i=0; i<v_size; i++)
530  {
531  obj -= 0.5 * ts * v_s[i]*v_t[i];
532  }
533  }
534  }
535 
536  /*
537  // naiive implementation
538  float64_t tmp_val2 = 0;
539 
540  for(int32_t i=0; i<num_vec; i++)
541  {
542  int32_t ti_i = task_indicator_lhs[i];
543  for(int32_t j=0; j<num_vec; j++)
544  {
545  // look up task similarity
546  int32_t ti_j = task_indicator_lhs[j];
547 
548  const float64_t ts = task_similarity_matrix(ti_i, ti_j);
549 
550  // compute objective
551  tmp_val2 -= 0.5 * alphas[i] * alphas[j] * ts * ((CBinaryLabels*)m_labels)->get_label(i) *
552  ((CBinaryLabels*)m_labels)->get_label(j) * features->dot(i, features,j);
553  }
554  }
555  */
556 
557 
558  return obj;
559 }
560 
561 
563 {
564  return 0.0;
565 }
566 
567 
568 #endif //HAVE_LAPACK

SHOGUN Machine Learning Toolbox - Documentation