SHOGUN  6.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parameter.h>
21 
22 using namespace shogun;
23 
26 {
27  init();
28 }
29 
32 {
33  init();
35 }
36 
38  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
40 {
41  init();
42  C1=C;
43  C2=C;
44  use_bias=true;
45 
46  set_features(traindat);
47  set_labels(trainlab);
49 }
50 
51 void CLibLinear::init()
52 {
54  use_bias=false;
55  C1=1;
56  C2=1;
58  epsilon=1e-5;
60  set_compute_bias(false);
61 
62  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
63  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
64  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
66  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
67  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
69  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
70  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
71  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
72 }
73 
75 {
76 }
77 
79 {
83 
84  if (data)
85  {
86  if (!data->has_property(FP_DOT))
87  SG_ERROR("Specified features are not of type CDotFeatures\n")
88 
89  set_features((CDotFeatures*) data);
90  }
92 
93 
94  int32_t num_train_labels=m_labels->get_num_labels();
95  int32_t num_feat=features->get_dim_feature_space();
96  int32_t num_vec=features->get_num_vectors();
97 
100  {
101  if (num_feat!=num_train_labels)
102  {
103  SG_ERROR("L1 methods require the data to be transposed: "
104  "number of features %d does not match number of "
105  "training labels %d\n",
106  num_feat, num_train_labels);
107  }
108  CMath::swap(num_feat, num_vec);
109 
110  }
111  else
112  {
113  if (num_vec!=num_train_labels)
114  {
115  SG_ERROR("number of vectors %d does not match "
116  "number of training labels %d\n",
117  num_vec, num_train_labels);
118  }
119  }
121  if (use_bias)
122  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
123  else
124  w=SGVector<float64_t>(num_feat);
125 
126  liblinear_problem prob;
127  if (use_bias)
128  {
130  prob.n=w.vlen+1;
131  else
132  prob.n=w.vlen;
133  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
134  }
135  else
136  {
137  prob.n=w.vlen;
138  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
139  }
140  prob.l=num_vec;
141  prob.x=features;
142  prob.y=SG_MALLOC(double, prob.l);
143  float64_t* Cs=SG_MALLOC(double, prob.l);
144  prob.use_bias=use_bias;
145  double Cp=C1;
146  double Cn=C2;
147 
148  for (int32_t i=0; i<prob.l; i++)
149  {
150  prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
151  if (prob.y[i] == +1)
152  Cs[i]=C1;
153  else if (prob.y[i] == -1)
154  Cs[i]=C2;
155  else
156  SG_ERROR("labels should be +1/-1 only\n")
157  }
158 
159  int pos = 0;
160  int neg = 0;
161  for(int i=0;i<prob.l;i++)
162  {
163  if(prob.y[i]==+1)
164  pos++;
165  }
166  neg = prob.l - pos;
167 
168  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
169 
170  function *fun_obj=NULL;
171  switch (liblinear_solver_type)
172  {
173  case L2R_LR:
174  {
175  fun_obj=new l2r_lr_fun(&prob, Cs);
176  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
177  SG_DEBUG("starting L2R_LR training via tron\n")
178  tron_obj.tron(w.vector, m_max_train_time);
179  SG_DEBUG("done with tron\n")
180  delete fun_obj;
181  break;
182  }
183  case L2R_L2LOSS_SVC:
184  {
185  fun_obj=new l2r_l2_svc_fun(&prob, Cs);
186  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
187  tron_obj.tron(w.vector, m_max_train_time);
188  delete fun_obj;
189  break;
190  }
191  case L2R_L2LOSS_SVC_DUAL:
192  solve_l2r_l1l2_svc(w, &prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
193  break;
194  case L2R_L1LOSS_SVC_DUAL:
195  solve_l2r_l1l2_svc(w, &prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
196  break;
197  case L1R_L2LOSS_SVC:
198  {
199  //ASSUME FEATURES ARE TRANSPOSED ALREADY
200  solve_l1r_l2_svc(w, &prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
201  break;
202  }
203  case L1R_LR:
204  {
205  //ASSUME FEATURES ARE TRANSPOSED ALREADY
206  solve_l1r_lr(w, &prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
207  break;
208  }
209  case L2R_LR_DUAL:
210  {
211  solve_l2r_lr_dual(w, &prob, epsilon, Cp, Cn);
212  break;
213  }
214  default:
215  SG_ERROR("Error: unknown solver_type\n")
216  break;
217  }
218 
219  set_w(w);
220 
221  if (use_bias)
222  set_bias(w[w.vlen]);
223  else
224  set_bias(0);
225 
226  SG_FREE(prob.y);
227  SG_FREE(Cs);
228 
229  return true;
230 }
231 
232 // A coordinate descent algorithm for
233 // L1-loss and L2-loss SVM dual problems
234 //
235 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
236 // s.t. 0 <= alpha_i <= upper_bound_i,
237 //
238 // where Qij = yi yj xi^T xj and
239 // D is a diagonal matrix
240 //
241 // In L1-SVM case:
242 // upper_bound_i = Cp if y_i = 1
243 // upper_bound_i = Cn if y_i = -1
244 // D_ii = 0
245 // In L2-SVM case:
246 // upper_bound_i = INF
247 // D_ii = 1/(2*Cp) if y_i = 1
248 // D_ii = 1/(2*Cn) if y_i = -1
249 //
250 // Given:
251 // x, y, Cp, Cn
252 // eps is the stopping tolerance
253 //
254 // solution will be put in w
255 
256 #undef GETI
257 #define GETI(i) (y[i]+1)
258 // To support weights for instances, use GETI(i) (i)
259 
260 void CLibLinear::solve_l2r_l1l2_svc(
262  const liblinear_problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
263 {
264  int l = prob->l;
265  int w_size = prob->n;
266  int i, s, iter = 0;
267  double C, d, G;
268  double *QD = SG_MALLOC(double, l);
269  int *index = SG_MALLOC(int, l);
270  double *alpha = SG_MALLOC(double, l);
271  int32_t *y = SG_MALLOC(int32_t, l);
272  int active_size = l;
273 
274  // PG: projected gradient, for shrinking and stopping
275  double PG;
276  double PGmax_old = CMath::INFTY;
277  double PGmin_old = -CMath::INFTY;
278  double PGmax_new, PGmin_new;
279 
280  // default solver_type: L2R_L2LOSS_SVC_DUAL
281  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
282  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
283  if(st == L2R_L1LOSS_SVC_DUAL)
284  {
285  diag[0] = 0;
286  diag[2] = 0;
287  upper_bound[0] = Cn;
288  upper_bound[2] = Cp;
289  }
290 
291  int n = prob->n;
292 
293  if (prob->use_bias)
294  n--;
295 
296  for(i=0; i<w_size; i++)
297  w[i] = 0;
298 
299  for(i=0; i<l; i++)
300  {
301  alpha[i] = 0;
302  if(prob->y[i] > 0)
303  {
304  y[i] = +1;
305  }
306  else
307  {
308  y[i] = -1;
309  }
310  QD[i] = diag[GETI(i)];
311 
312  QD[i] += prob->x->dot(i, prob->x,i);
313  index[i] = i;
314  }
315 
316 
317  CTime start_time;
318  while (iter < max_iterations && !CSignal::cancel_computations())
319  {
320  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
321  break;
322 
323  PGmax_new = -CMath::INFTY;
324  PGmin_new = CMath::INFTY;
325 
326  for (i=0; i<active_size; i++)
327  {
328  int j = CMath::random(i, active_size-1);
329  CMath::swap(index[i], index[j]);
330  }
331 
332  for (s=0;s<active_size;s++)
333  {
334  i = index[s];
335  int32_t yi = y[i];
336 
337  G = prob->x->dense_dot(i, w.vector, n);
338  if (prob->use_bias)
339  G+=w.vector[n];
340 
341  if (m_linear_term.vector)
342  G = G*yi + m_linear_term.vector[i];
343  else
344  G = G*yi-1;
345 
346  C = upper_bound[GETI(i)];
347  G += alpha[i]*diag[GETI(i)];
348 
349  PG = 0;
350  if (alpha[i] == 0)
351  {
352  if (G > PGmax_old)
353  {
354  active_size--;
355  CMath::swap(index[s], index[active_size]);
356  s--;
357  continue;
358  }
359  else if (G < 0)
360  PG = G;
361  }
362  else if (alpha[i] == C)
363  {
364  if (G < PGmin_old)
365  {
366  active_size--;
367  CMath::swap(index[s], index[active_size]);
368  s--;
369  continue;
370  }
371  else if (G > 0)
372  PG = G;
373  }
374  else
375  PG = G;
376 
377  PGmax_new = CMath::max(PGmax_new, PG);
378  PGmin_new = CMath::min(PGmin_new, PG);
379 
380  if(fabs(PG) > 1.0e-12)
381  {
382  double alpha_old = alpha[i];
383  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
384  d = (alpha[i] - alpha_old)*yi;
385 
386  prob->x->add_to_dense_vec(d, i, w.vector, n);
387 
388  if (prob->use_bias)
389  w.vector[n]+=d;
390  }
391  }
392 
393  iter++;
394  float64_t gap=PGmax_new - PGmin_new;
395  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
396 
397  if(gap <= eps)
398  {
399  if(active_size == l)
400  break;
401  else
402  {
403  active_size = l;
404  PGmax_old = CMath::INFTY;
405  PGmin_old = -CMath::INFTY;
406  continue;
407  }
408  }
409  PGmax_old = PGmax_new;
410  PGmin_old = PGmin_new;
411  if (PGmax_old <= 0)
412  PGmax_old = CMath::INFTY;
413  if (PGmin_old >= 0)
414  PGmin_old = -CMath::INFTY;
415  }
416 
417  SG_DONE()
418  SG_INFO("optimization finished, #iter = %d\n",iter)
419  if (iter >= max_iterations)
420  {
421  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
422  "(also see liblinear FAQ)\n\n");
423  }
424 
425  // calculate objective value
426 
427  double v = 0;
428  int nSV = 0;
429  for(i=0; i<w_size; i++)
430  v += w.vector[i]*w.vector[i];
431  for(i=0; i<l; i++)
432  {
433  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
434  if(alpha[i] > 0)
435  ++nSV;
436  }
437  SG_INFO("Objective value = %lf\n",v/2)
438  SG_INFO("nSV = %d\n",nSV)
439 
440  SG_FREE(QD);
441  SG_FREE(alpha);
442  SG_FREE(y);
443  SG_FREE(index);
444 }
445 
446 // A coordinate descent algorithm for
447 // L1-regularized L2-loss support vector classification
448 //
449 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
450 //
451 // Given:
452 // x, y, Cp, Cn
453 // eps is the stopping tolerance
454 //
455 // solution will be put in w
456 
457 #undef GETI
458 #define GETI(i) (y[i]+1)
459 // To support weights for instances, use GETI(i) (i)
460 
461 void CLibLinear::solve_l1r_l2_svc(
463  liblinear_problem *prob_col, double eps, double Cp, double Cn)
464 {
465  int l = prob_col->l;
466  int w_size = prob_col->n;
467  int j, s, iter = 0;
468  int active_size = w_size;
469  int max_num_linesearch = 20;
470 
471  double sigma = 0.01;
472  double d, G_loss, G, H;
473  double Gmax_old = CMath::INFTY;
474  double Gmax_new;
475  double Gmax_init=0;
476  double d_old, d_diff;
477  double loss_old=0, loss_new;
478  double appxcond, cond;
479 
480  int *index = SG_MALLOC(int, w_size);
481  int32_t *y = SG_MALLOC(int32_t, l);
482  double *b = SG_MALLOC(double, l); // b = 1-ywTx
483  double *xj_sq = SG_MALLOC(double, w_size);
484 
485  CDotFeatures* x = (CDotFeatures*) prob_col->x;
486  void* iterator;
487  int32_t ind;
488  float64_t val;
489 
490  double C[3] = {Cn,0,Cp};
491 
492  int n = prob_col->n;
493  if (prob_col->use_bias)
494  n--;
495 
496  for(j=0; j<l; j++)
497  {
498  b[j] = 1;
499  if(prob_col->y[j] > 0)
500  y[j] = 1;
501  else
502  y[j] = -1;
503  }
504 
505  for(j=0; j<w_size; j++)
506  {
507  w.vector[j] = 0;
508  index[j] = j;
509  xj_sq[j] = 0;
510 
511  if (use_bias && j==n)
512  {
513  for (ind=0; ind<l; ind++)
514  xj_sq[n] += C[GETI(ind)];
515  }
516  else
517  {
518  iterator=x->get_feature_iterator(j);
519  while (x->get_next_feature(ind, val, iterator))
520  xj_sq[j] += C[GETI(ind)]*val*val;
521  x->free_feature_iterator(iterator);
522  }
523  }
524 
525 
526  CTime start_time;
527  while (iter < max_iterations && !CSignal::cancel_computations())
528  {
529  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
530  break;
531 
532  Gmax_new = 0;
533 
534  for(j=0; j<active_size; j++)
535  {
536  int i = CMath::random(j, active_size-1);
537  CMath::swap(index[i], index[j]);
538  }
539 
540  for(s=0; s<active_size; s++)
541  {
542  j = index[s];
543  G_loss = 0;
544  H = 0;
545 
546  if (use_bias && j==n)
547  {
548  for (ind=0; ind<l; ind++)
549  {
550  if(b[ind] > 0)
551  {
552  double tmp = C[GETI(ind)]*y[ind];
553  G_loss -= tmp*b[ind];
554  H += tmp*y[ind];
555  }
556  }
557  }
558  else
559  {
560  iterator=x->get_feature_iterator(j);
561 
562  while (x->get_next_feature(ind, val, iterator))
563  {
564  if(b[ind] > 0)
565  {
566  double tmp = C[GETI(ind)]*val*y[ind];
567  G_loss -= tmp*b[ind];
568  H += tmp*val*y[ind];
569  }
570  }
571  x->free_feature_iterator(iterator);
572  }
573 
574  G_loss *= 2;
575 
576  G = G_loss;
577  H *= 2;
578  H = CMath::max(H, 1e-12);
579 
580  double Gp = G+1;
581  double Gn = G-1;
582  double violation = 0;
583  if(w.vector[j] == 0)
584  {
585  if(Gp < 0)
586  violation = -Gp;
587  else if(Gn > 0)
588  violation = Gn;
589  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
590  {
591  active_size--;
592  CMath::swap(index[s], index[active_size]);
593  s--;
594  continue;
595  }
596  }
597  else if(w.vector[j] > 0)
598  violation = fabs(Gp);
599  else
600  violation = fabs(Gn);
601 
602  Gmax_new = CMath::max(Gmax_new, violation);
603 
604  // obtain Newton direction d
605  if(Gp <= H*w.vector[j])
606  d = -Gp/H;
607  else if(Gn >= H*w.vector[j])
608  d = -Gn/H;
609  else
610  d = -w.vector[j];
611 
612  if(fabs(d) < 1.0e-12)
613  continue;
614 
615  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
616  d_old = 0;
617  int num_linesearch;
618  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
619  {
620  d_diff = d_old - d;
621  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
622 
623  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
624  if(appxcond <= 0)
625  {
626  if (use_bias && j==n)
627  {
628  for (ind=0; ind<l; ind++)
629  b[ind] += d_diff*y[ind];
630  break;
631  }
632  else
633  {
634  iterator=x->get_feature_iterator(j);
635  while (x->get_next_feature(ind, val, iterator))
636  b[ind] += d_diff*val*y[ind];
637 
638  x->free_feature_iterator(iterator);
639  break;
640  }
641  }
642 
643  if(num_linesearch == 0)
644  {
645  loss_old = 0;
646  loss_new = 0;
647 
648  if (use_bias && j==n)
649  {
650  for (ind=0; ind<l; ind++)
651  {
652  if(b[ind] > 0)
653  loss_old += C[GETI(ind)]*b[ind]*b[ind];
654  double b_new = b[ind] + d_diff*y[ind];
655  b[ind] = b_new;
656  if(b_new > 0)
657  loss_new += C[GETI(ind)]*b_new*b_new;
658  }
659  }
660  else
661  {
662  iterator=x->get_feature_iterator(j);
663  while (x->get_next_feature(ind, val, iterator))
664  {
665  if(b[ind] > 0)
666  loss_old += C[GETI(ind)]*b[ind]*b[ind];
667  double b_new = b[ind] + d_diff*val*y[ind];
668  b[ind] = b_new;
669  if(b_new > 0)
670  loss_new += C[GETI(ind)]*b_new*b_new;
671  }
672  x->free_feature_iterator(iterator);
673  }
674  }
675  else
676  {
677  loss_new = 0;
678  if (use_bias && j==n)
679  {
680  for (ind=0; ind<l; ind++)
681  {
682  double b_new = b[ind] + d_diff*y[ind];
683  b[ind] = b_new;
684  if(b_new > 0)
685  loss_new += C[GETI(ind)]*b_new*b_new;
686  }
687  }
688  else
689  {
690  iterator=x->get_feature_iterator(j);
691  while (x->get_next_feature(ind, val, iterator))
692  {
693  double b_new = b[ind] + d_diff*val*y[ind];
694  b[ind] = b_new;
695  if(b_new > 0)
696  loss_new += C[GETI(ind)]*b_new*b_new;
697  }
698  x->free_feature_iterator(iterator);
699  }
700  }
701 
702  cond = cond + loss_new - loss_old;
703  if(cond <= 0)
704  break;
705  else
706  {
707  d_old = d;
708  d *= 0.5;
709  delta *= 0.5;
710  }
711  }
712 
713  w.vector[j] += d;
714 
715  // recompute b[] if line search takes too many steps
716  if(num_linesearch >= max_num_linesearch)
717  {
718  SG_INFO("#")
719  for(int i=0; i<l; i++)
720  b[i] = 1;
721 
722  for(int i=0; i<n; i++)
723  {
724  if(w.vector[i]==0)
725  continue;
726 
727  iterator=x->get_feature_iterator(i);
728  while (x->get_next_feature(ind, val, iterator))
729  b[ind] -= w.vector[i]*val*y[ind];
730  x->free_feature_iterator(iterator);
731  }
732 
733  if (use_bias && w.vector[n])
734  {
735  for (ind=0; ind<l; ind++)
736  b[ind] -= w.vector[n]*y[ind];
737  }
738  }
739  }
740 
741  if(iter == 0)
742  Gmax_init = Gmax_new;
743  iter++;
744 
745  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
746  -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
747 
748  if(Gmax_new <= eps*Gmax_init)
749  {
750  if(active_size == w_size)
751  break;
752  else
753  {
754  active_size = w_size;
755  Gmax_old = CMath::INFTY;
756  continue;
757  }
758  }
759 
760  Gmax_old = Gmax_new;
761  }
762 
763  SG_DONE()
764  SG_INFO("optimization finished, #iter = %d\n", iter)
765  if(iter >= max_iterations)
766  SG_WARNING("\nWARNING: reaching max number of iterations\n")
767 
768  // calculate objective value
769 
770  double v = 0;
771  int nnz = 0;
772  for(j=0; j<w_size; j++)
773  {
774  if(w.vector[j] != 0)
775  {
776  v += fabs(w.vector[j]);
777  nnz++;
778  }
779  }
780  for(j=0; j<l; j++)
781  if(b[j] > 0)
782  v += C[GETI(j)]*b[j]*b[j];
783 
784  SG_INFO("Objective value = %lf\n", v)
785  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
786 
787  SG_FREE(index);
788  SG_FREE(y);
789  SG_FREE(b);
790  SG_FREE(xj_sq);
791 }
792 
793 // A coordinate descent algorithm for
794 // L1-regularized logistic regression problems
795 //
796 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
797 //
798 // Given:
799 // x, y, Cp, Cn
800 // eps is the stopping tolerance
801 //
802 // solution will be put in w
803 
804 #undef GETI
805 #define GETI(i) (y[i]+1)
806 // To support weights for instances, use GETI(i) (i)
807 
808 void CLibLinear::solve_l1r_lr(
810  const liblinear_problem *prob_col, double eps,
811  double Cp, double Cn)
812 {
813  int l = prob_col->l;
814  int w_size = prob_col->n;
815  int j, s, iter = 0;
816  int active_size = w_size;
817  int max_num_linesearch = 20;
818 
819  double x_min = 0;
820  double sigma = 0.01;
821  double d, G, H;
822  double Gmax_old = CMath::INFTY;
823  double Gmax_new;
824  double Gmax_init=0;
825  double sum1, appxcond1;
826  double sum2, appxcond2;
827  double cond;
828 
829  int *index = SG_MALLOC(int, w_size);
830  int32_t *y = SG_MALLOC(int32_t, l);
831  double *exp_wTx = SG_MALLOC(double, l);
832  double *exp_wTx_new = SG_MALLOC(double, l);
833  double *xj_max = SG_MALLOC(double, w_size);
834  double *C_sum = SG_MALLOC(double, w_size);
835  double *xjneg_sum = SG_MALLOC(double, w_size);
836  double *xjpos_sum = SG_MALLOC(double, w_size);
837 
838  CDotFeatures* x = prob_col->x;
839  void* iterator;
840  int ind;
841  double val;
842 
843  double C[3] = {Cn,0,Cp};
844 
845  int n = prob_col->n;
846  if (prob_col->use_bias)
847  n--;
848 
849  for(j=0; j<l; j++)
850  {
851  exp_wTx[j] = 1;
852  if(prob_col->y[j] > 0)
853  y[j] = 1;
854  else
855  y[j] = -1;
856  }
857  for(j=0; j<w_size; j++)
858  {
859  w.vector[j] = 0;
860  index[j] = j;
861  xj_max[j] = 0;
862  C_sum[j] = 0;
863  xjneg_sum[j] = 0;
864  xjpos_sum[j] = 0;
865 
866  if (use_bias && j==n)
867  {
868  for (ind=0; ind<l; ind++)
869  {
870  x_min = CMath::min(x_min, 1.0);
871  xj_max[j] = CMath::max(xj_max[j], 1.0);
872  C_sum[j] += C[GETI(ind)];
873  if(y[ind] == -1)
874  xjneg_sum[j] += C[GETI(ind)];
875  else
876  xjpos_sum[j] += C[GETI(ind)];
877  }
878  }
879  else
880  {
881  iterator=x->get_feature_iterator(j);
882  while (x->get_next_feature(ind, val, iterator))
883  {
884  x_min = CMath::min(x_min, val);
885  xj_max[j] = CMath::max(xj_max[j], val);
886  C_sum[j] += C[GETI(ind)];
887  if(y[ind] == -1)
888  xjneg_sum[j] += C[GETI(ind)]*val;
889  else
890  xjpos_sum[j] += C[GETI(ind)]*val;
891  }
892  x->free_feature_iterator(iterator);
893  }
894  }
895 
896  CTime start_time;
897  while (iter < max_iterations && !CSignal::cancel_computations())
898  {
899  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
900  break;
901 
902  Gmax_new = 0;
903 
904  for(j=0; j<active_size; j++)
905  {
906  int i = CMath::random(j, active_size-1);
907  CMath::swap(index[i], index[j]);
908  }
909 
910  for(s=0; s<active_size; s++)
911  {
912  j = index[s];
913  sum1 = 0;
914  sum2 = 0;
915  H = 0;
916 
917  if (use_bias && j==n)
918  {
919  for (ind=0; ind<l; ind++)
920  {
921  double exp_wTxind = exp_wTx[ind];
922  double tmp1 = 1.0/(1+exp_wTxind);
923  double tmp2 = C[GETI(ind)]*tmp1;
924  double tmp3 = tmp2*exp_wTxind;
925  sum2 += tmp2;
926  sum1 += tmp3;
927  H += tmp1*tmp3;
928  }
929  }
930  else
931  {
932  iterator=x->get_feature_iterator(j);
933  while (x->get_next_feature(ind, val, iterator))
934  {
935  double exp_wTxind = exp_wTx[ind];
936  double tmp1 = val/(1+exp_wTxind);
937  double tmp2 = C[GETI(ind)]*tmp1;
938  double tmp3 = tmp2*exp_wTxind;
939  sum2 += tmp2;
940  sum1 += tmp3;
941  H += tmp1*tmp3;
942  }
943  x->free_feature_iterator(iterator);
944  }
945 
946  G = -sum2 + xjneg_sum[j];
947 
948  double Gp = G+1;
949  double Gn = G-1;
950  double violation = 0;
951  if(w.vector[j] == 0)
952  {
953  if(Gp < 0)
954  violation = -Gp;
955  else if(Gn > 0)
956  violation = Gn;
957  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
958  {
959  active_size--;
960  CMath::swap(index[s], index[active_size]);
961  s--;
962  continue;
963  }
964  }
965  else if(w.vector[j] > 0)
966  violation = fabs(Gp);
967  else
968  violation = fabs(Gn);
969 
970  Gmax_new = CMath::max(Gmax_new, violation);
971 
972  // obtain Newton direction d
973  if(Gp <= H*w.vector[j])
974  d = -Gp/H;
975  else if(Gn >= H*w.vector[j])
976  d = -Gn/H;
977  else
978  d = -w.vector[j];
979 
980  if(fabs(d) < 1.0e-12)
981  continue;
982 
983  d = CMath::min(CMath::max(d,-10.0),10.0);
984 
985  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
986  int num_linesearch;
987  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
988  {
989  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
990 
991  if(x_min >= 0)
992  {
993  double tmp = exp(d*xj_max[j]);
994  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
995  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
996  if(CMath::min(appxcond1,appxcond2) <= 0)
997  {
998  if (use_bias && j==n)
999  {
1000  for (ind=0; ind<l; ind++)
1001  exp_wTx[ind] *= exp(d);
1002  }
1003 
1004  else
1005  {
1006  iterator=x->get_feature_iterator(j);
1007  while (x->get_next_feature(ind, val, iterator))
1008  exp_wTx[ind] *= exp(d*val);
1009  x->free_feature_iterator(iterator);
1010  }
1011  break;
1012  }
1013  }
1014 
1015  cond += d*xjneg_sum[j];
1016 
1017  int i = 0;
1018 
1019  if (use_bias && j==n)
1020  {
1021  for (ind=0; ind<l; ind++)
1022  {
1023  double exp_dx = exp(d);
1024  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1025  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1026  i++;
1027  }
1028  }
1029  else
1030  {
1031 
1032  iterator=x->get_feature_iterator(j);
1033  while (x->get_next_feature(ind, val, iterator))
1034  {
1035  double exp_dx = exp(d*val);
1036  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1037  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1038  i++;
1039  }
1040  x->free_feature_iterator(iterator);
1041  }
1042 
1043  if(cond <= 0)
1044  {
1045  i = 0;
1046  if (use_bias && j==n)
1047  {
1048  for (ind=0; ind<l; ind++)
1049  {
1050  exp_wTx[ind] = exp_wTx_new[i];
1051  i++;
1052  }
1053  }
1054  else
1055  {
1056  iterator=x->get_feature_iterator(j);
1057  while (x->get_next_feature(ind, val, iterator))
1058  {
1059  exp_wTx[ind] = exp_wTx_new[i];
1060  i++;
1061  }
1062  x->free_feature_iterator(iterator);
1063  }
1064  break;
1065  }
1066  else
1067  {
1068  d *= 0.5;
1069  delta *= 0.5;
1070  }
1071  }
1072 
1073  w.vector[j] += d;
1074 
1075  // recompute exp_wTx[] if line search takes too many steps
1076  if(num_linesearch >= max_num_linesearch)
1077  {
1078  SG_INFO("#")
1079  for(int i=0; i<l; i++)
1080  exp_wTx[i] = 0;
1081 
1082  for(int i=0; i<w_size; i++)
1083  {
1084  if(w.vector[i]==0) continue;
1085 
1086  if (use_bias && i==n)
1087  {
1088  for (ind=0; ind<l; ind++)
1089  exp_wTx[ind] += w.vector[i];
1090  }
1091  else
1092  {
1093  iterator=x->get_feature_iterator(i);
1094  while (x->get_next_feature(ind, val, iterator))
1095  exp_wTx[ind] += w.vector[i]*val;
1096  x->free_feature_iterator(iterator);
1097  }
1098  }
1099 
1100  for(int i=0; i<l; i++)
1101  exp_wTx[i] = exp(exp_wTx[i]);
1102  }
1103  }
1104 
1105  if(iter == 0)
1106  Gmax_init = Gmax_new;
1107  iter++;
1108  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1109 
1110  if(Gmax_new <= eps*Gmax_init)
1111  {
1112  if(active_size == w_size)
1113  break;
1114  else
1115  {
1116  active_size = w_size;
1117  Gmax_old = CMath::INFTY;
1118  continue;
1119  }
1120  }
1121 
1122  Gmax_old = Gmax_new;
1123  }
1124 
1125  SG_DONE()
1126  SG_INFO("optimization finished, #iter = %d\n", iter)
1127  if(iter >= max_iterations)
1128  SG_WARNING("\nWARNING: reaching max number of iterations\n")
1129 
1130  // calculate objective value
1131 
1132  double v = 0;
1133  int nnz = 0;
1134  for(j=0; j<w_size; j++)
1135  if(w.vector[j] != 0)
1136  {
1137  v += fabs(w.vector[j]);
1138  nnz++;
1139  }
1140  for(j=0; j<l; j++)
1141  if(y[j] == 1)
1142  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1143  else
1144  v += C[GETI(j)]*log(1+exp_wTx[j]);
1145 
1146  SG_INFO("Objective value = %lf\n", v)
1147  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
1148 
1149  SG_FREE(index);
1150  SG_FREE(y);
1151  SG_FREE(exp_wTx);
1152  SG_FREE(exp_wTx_new);
1153  SG_FREE(xj_max);
1154  SG_FREE(C_sum);
1155  SG_FREE(xjneg_sum);
1156  SG_FREE(xjpos_sum);
1157 }
1158 
1159 // A coordinate descent algorithm for
1160 // the dual of L2-regularized logistic regression problems
1161 //
1162 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1163 // s.t. 0 <= \alpha_i <= upper_bound_i,
1164 //
1165 // where Qij = yi yj xi^T xj and
1166 // upper_bound_i = Cp if y_i = 1
1167 // upper_bound_i = Cn if y_i = -1
1168 //
1169 // Given:
1170 // x, y, Cp, Cn
1171 // eps is the stopping tolerance
1172 //
1173 // solution will be put in w
1174 //
1175 // See Algorithm 5 of Yu et al., MLJ 2010
1176 
1177 #undef GETI
1178 #define GETI(i) (y[i]+1)
1179 // To support weights for instances, use GETI(i) (i)
1180 
1181 void CLibLinear::solve_l2r_lr_dual(SGVector<float64_t>& w, const liblinear_problem *prob, double eps, double Cp, double Cn)
1182 {
1183  int l = prob->l;
1184  int w_size = prob->n;
1185  int i, s, iter = 0;
1186  double *xTx = new double[l];
1187  int max_iter = 1000;
1188  int *index = new int[l];
1189  double *alpha = new double[2*l]; // store alpha and C - alpha
1190  int32_t *y = new int32_t[l];
1191  int max_inner_iter = 100; // for inner Newton
1192  double innereps = 1e-2;
1193  double innereps_min = CMath::min(1e-8, eps);
1194  double upper_bound[3] = {Cn, 0, Cp};
1195  double Gmax_init = 0;
1196 
1197  for(i=0; i<l; i++)
1198  {
1199  if(prob->y[i] > 0)
1200  {
1201  y[i] = +1;
1202  }
1203  else
1204  {
1205  y[i] = -1;
1206  }
1207  }
1208 
1209  // Initial alpha can be set here. Note that
1210  // 0 < alpha[i] < upper_bound[GETI(i)]
1211  // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1212  for(i=0; i<l; i++)
1213  {
1214  alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
1215  alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1216  }
1217 
1218  for(i=0; i<w_size; i++)
1219  w[i] = 0;
1220  for(i=0; i<l; i++)
1221  {
1222  xTx[i] = prob->x->dot(i, prob->x,i);
1223  prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
1224 
1225  if (prob->use_bias)
1226  {
1227  w.vector[w_size]+=y[i]*alpha[2*i];
1228  xTx[i]+=1;
1229  }
1230  index[i] = i;
1231  }
1232 
1233  while (iter < max_iter)
1234  {
1235  for (i=0; i<l; i++)
1236  {
1237  int j = CMath::random(i, l-1);
1238  CMath::swap(index[i], index[j]);
1239  }
1240  int newton_iter = 0;
1241  double Gmax = 0;
1242  for (s=0; s<l; s++)
1243  {
1244  i = index[s];
1245  int32_t yi = y[i];
1246  double C = upper_bound[GETI(i)];
1247  double ywTx = 0, xisq = xTx[i];
1248 
1249  ywTx = prob->x->dense_dot(i, w.vector, w_size);
1250  if (prob->use_bias)
1251  ywTx+=w.vector[w_size];
1252 
1253  ywTx *= y[i];
1254  double a = xisq, b = ywTx;
1255 
1256  // Decide to minimize g_1(z) or g_2(z)
1257  int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1258  if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1259  {
1260  ind1 = 2*i+1;
1261  ind2 = 2*i;
1262  sign = -1;
1263  }
1264 
1265  // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1266  double alpha_old = alpha[ind1];
1267  double z = alpha_old;
1268  if(C - z < 0.5 * C)
1269  z = 0.1*z;
1270  double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
1271  Gmax = CMath::max(Gmax, CMath::abs(gp));
1272 
1273  // Newton method on the sub-problem
1274  const double eta = 0.1; // xi in the paper
1275  int inner_iter = 0;
1276  while (inner_iter <= max_inner_iter)
1277  {
1278  if(fabs(gp) < innereps)
1279  break;
1280  double gpp = a + C/(C-z)/z;
1281  double tmpz = z - gp/gpp;
1282  if(tmpz <= 0)
1283  z *= eta;
1284  else // tmpz in (0, C)
1285  z = tmpz;
1286  gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1287  newton_iter++;
1288  inner_iter++;
1289  }
1290 
1291  if(inner_iter > 0) // update w
1292  {
1293  alpha[ind1] = z;
1294  alpha[ind2] = C-z;
1295 
1296  prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
1297 
1298  if (prob->use_bias)
1299  w.vector[w_size]+=sign*(z-alpha_old)*yi;
1300  }
1301  }
1302 
1303  if(iter == 0)
1304  Gmax_init = Gmax;
1305  iter++;
1306 
1307  SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1308 
1309  if(Gmax < eps)
1310  break;
1311 
1312  if(newton_iter <= l/10)
1313  innereps = CMath::max(innereps_min, 0.1*innereps);
1314 
1315  }
1316 
1317  SG_DONE()
1318  SG_INFO("optimization finished, #iter = %d\n",iter)
1319  if (iter >= max_iter)
1320  SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1321 
1322  // calculate objective value
1323 
1324  double v = 0;
1325  for(i=0; i<w_size; i++)
1326  v += w[i] * w[i];
1327  v *= 0.5;
1328  for(i=0; i<l; i++)
1329  v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1330  - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1331  SG_INFO("Objective value = %lf\n", v)
1332 
1333  delete [] xTx;
1334  delete [] alpha;
1335  delete [] y;
1336  delete [] index;
1337 }
1338 
1339 
1341 {
1342  if (!m_labels)
1343  SG_ERROR("Please assign labels first!\n")
1344 
1345  int32_t num_labels=m_labels->get_num_labels();
1346 
1347  if (num_labels!=linear_term.vlen)
1348  {
1349  SG_ERROR("Number of labels (%d) does not match number"
1350  " of entries (%d) in linear term \n", num_labels,
1351  linear_term.vlen);
1352  }
1353 
1354  m_linear_term=linear_term;
1355 }
1356 
1358 {
1360  SG_ERROR("Please assign linear term first!\n")
1361 
1362  return m_linear_term;
1363 }
1364 
1366 {
1367  if (!m_labels)
1368  SG_ERROR("Please assign labels first!\n")
1369 
1372 }
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:42
#define SG_INFO(...)
Definition: SGIO.h:117
#define SG_DONE()
Definition: SGIO.h:156
static void fill_vector(T *vec, int32_t len, T value)
Definition: SGVector.cpp:264
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
L2 regularized linear logistic regression via dual.
Definition: LibLinear.h:41
virtual ~CLibLinear()
Definition: LibLinear.cpp:74
virtual void set_w(const SGVector< float64_t > src_w)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
static const float64_t INFTY
infinity
Definition: Math.h:2069
virtual int32_t get_num_labels() const =0
static float64_t log10(float64_t v)
Definition: Math.h:892
L2 regularized SVM with L2-loss using newton in the primal.
Definition: LibLinear.h:32
L1 regularized logistic regression.
Definition: LibLinear.h:39
virtual int32_t get_num_vectors() const =0
void tron(float64_t *w, float64_t max_train_time)
Definition: tron.cpp:72
float64_t m_max_train_time
Definition: Machine.h:362
CLabels * m_labels
Definition: Machine.h:365
#define SG_ERROR(...)
Definition: SGIO.h:128
L1 regularized SVM with L2-loss using dual coordinate descent.
Definition: LibLinear.h:37
Features that support dot products among other operations.
Definition: DotFeatures.h:44
class Tron
Definition: tron.h:55
SGVector< float64_t > get_linear_term()
Definition: LibLinear.cpp:1357
static uint64_t random()
Definition: Math.h:1014
virtual int32_t get_dim_feature_space() const =0
float64_t cur_time_diff(bool verbose=false)
Definition: Time.cpp:68
LIBLINEAR_SOLVER_TYPE
Definition: LibLinear.h:25
index_t vlen
Definition: SGVector.h:545
#define ASSERT(x)
Definition: SGIO.h:200
void set_linear_term(const SGVector< float64_t > linear_term)
Definition: LibLinear.cpp:1340
void set_max_iterations(int32_t max_iter=1000)
Definition: LibLinear.h:164
#define GETI(i)
Definition: LibLinear.cpp:1178
static void clear_cancel()
Definition: Signal.cpp:126
L2 regularized linear logistic regression.
Definition: LibLinear.h:28
double float64_t
Definition: common.h:60
virtual void free_feature_iterator(void *iterator)=0
L2 regularized SVM with L2-loss using dual coordinate descent.
Definition: LibLinear.h:30
virtual void set_features(CDotFeatures *feat)
static T max(T a, T b)
Definition: Math.h:164
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
LIBLINEAR_SOLVER_TYPE liblinear_solver_type
Definition: LibLinear.h:219
static bool cancel_computations()
Definition: Signal.h:111
virtual void * get_feature_iterator(int32_t vector_index)=0
virtual void set_compute_bias(bool compute_bias)
CDotFeatures * features
float64_t epsilon
Definition: LibLinear.h:211
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)=0
#define SG_DEBUG(...)
Definition: SGIO.h:106
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
int machine_int_t
Definition: common.h:69
The class Features is the base class of all feature objects.
Definition: Features.h:68
static T min(T a, T b)
Definition: Math.h:153
static float64_t log(float64_t v)
Definition: Math.h:917
virtual bool train_machine(CFeatures *data=NULL)
Definition: LibLinear.cpp:78
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
static void swap(T &a, T &b)
Definition: Math.h:433
virtual void set_bias(float64_t b)
L2 regularized linear SVM with L1-loss using dual coordinate descent.
Definition: LibLinear.h:35
#define SG_WARNING(...)
Definition: SGIO.h:127
#define SG_ADD(...)
Definition: SGObject.h:94
int32_t max_iterations
Definition: LibLinear.h:213
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
#define SG_SABS_PROGRESS(...)
Definition: SGIO.h:187
SGVector< float64_t > m_linear_term
Definition: LibLinear.h:216
static T abs(T a)
Definition: Math.h:175

SHOGUN Machine Learning Toolbox - Documentation