SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
16 #include <shogun/base/Parameter.h>
21 
22 using namespace shogun;
23 
26 {
27  init();
28 }
29 
32 {
33  init();
35 }
36 
38  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
40 {
41  init();
42  C1=C;
43  C2=C;
44  use_bias=true;
45 
46  set_features(traindat);
47  set_labels(trainlab);
49 }
50 
51 void CLibLinear::init()
52 {
54  use_bias=false;
55  C1=1;
56  C2=1;
58  epsilon=1e-5;
59 
60  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
61  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
62  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
64  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
65  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
67  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
68  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
69  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
70 }
71 
73 {
74 }
75 
77 {
81 
82  if (data)
83  {
84  if (!data->has_property(FP_DOT))
85  SG_ERROR("Specified features are not of type CDotFeatures\n")
86 
87  set_features((CDotFeatures*) data);
88  }
90 
91 
92  int32_t num_train_labels=m_labels->get_num_labels();
93  int32_t num_feat=features->get_dim_feature_space();
94  int32_t num_vec=features->get_num_vectors();
95 
98  {
99  if (num_feat!=num_train_labels)
100  {
101  SG_ERROR("L1 methods require the data to be transposed: "
102  "number of features %d does not match number of "
103  "training labels %d\n",
104  num_feat, num_train_labels);
105  }
106  CMath::swap(num_feat, num_vec);
107 
108  }
109  else
110  {
111  if (num_vec!=num_train_labels)
112  {
113  SG_ERROR("number of vectors %d does not match "
114  "number of training labels %d\n",
115  num_vec, num_train_labels);
116  }
117  }
118  if (use_bias)
119  w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
120  else
121  w=SGVector<float64_t>(num_feat);
122 
123  liblinear_problem prob;
124  if (use_bias)
125  {
127  prob.n=w.vlen+1;
128  else
129  prob.n=w.vlen;
130  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
131  }
132  else
133  {
134  prob.n=w.vlen;
135  memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
136  }
137  prob.l=num_vec;
138  prob.x=features;
139  prob.y=SG_MALLOC(double, prob.l);
140  float64_t* Cs=SG_MALLOC(double, prob.l);
141  prob.use_bias=use_bias;
142  double Cp=C1;
143  double Cn=C2;
144 
145  for (int32_t i=0; i<prob.l; i++)
146  {
147  prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
148  if (prob.y[i] == +1)
149  Cs[i]=C1;
150  else if (prob.y[i] == -1)
151  Cs[i]=C2;
152  else
153  SG_ERROR("labels should be +1/-1 only\n")
154  }
155 
156  int pos = 0;
157  int neg = 0;
158  for(int i=0;i<prob.l;i++)
159  {
160  if(prob.y[i]==+1)
161  pos++;
162  }
163  neg = prob.l - pos;
164 
165  SG_INFO("%d training points %d dims\n", prob.l, prob.n)
166 
167  function *fun_obj=NULL;
168  switch (liblinear_solver_type)
169  {
170  case L2R_LR:
171  {
172  fun_obj=new l2r_lr_fun(&prob, Cs);
173  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
174  SG_DEBUG("starting L2R_LR training via tron\n")
175  tron_obj.tron(w.vector, m_max_train_time);
176  SG_DEBUG("done with tron\n")
177  delete fun_obj;
178  break;
179  }
180  case L2R_L2LOSS_SVC:
181  {
182  fun_obj=new l2r_l2_svc_fun(&prob, Cs);
183  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
184  tron_obj.tron(w.vector, m_max_train_time);
185  delete fun_obj;
186  break;
187  }
188  case L2R_L2LOSS_SVC_DUAL:
189  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
190  break;
191  case L2R_L1LOSS_SVC_DUAL:
192  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
193  break;
194  case L1R_L2LOSS_SVC:
195  {
196  //ASSUME FEATURES ARE TRANSPOSED ALREADY
197  solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
198  break;
199  }
200  case L1R_LR:
201  {
202  //ASSUME FEATURES ARE TRANSPOSED ALREADY
203  solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
204  break;
205  }
206  case L2R_LR_DUAL:
207  {
208  solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
209  break;
210  }
211  default:
212  SG_ERROR("Error: unknown solver_type\n")
213  break;
214  }
215 
216  if (use_bias)
217  set_bias(w[w.vlen]);
218  else
219  set_bias(0);
220 
221  SG_FREE(prob.y);
222  SG_FREE(Cs);
223 
224  return true;
225 }
226 
227 // A coordinate descent algorithm for
228 // L1-loss and L2-loss SVM dual problems
229 //
230 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
231 // s.t. 0 <= alpha_i <= upper_bound_i,
232 //
233 // where Qij = yi yj xi^T xj and
234 // D is a diagonal matrix
235 //
236 // In L1-SVM case:
237 // upper_bound_i = Cp if y_i = 1
238 // upper_bound_i = Cn if y_i = -1
239 // D_ii = 0
240 // In L2-SVM case:
241 // upper_bound_i = INF
242 // D_ii = 1/(2*Cp) if y_i = 1
243 // D_ii = 1/(2*Cn) if y_i = -1
244 //
245 // Given:
246 // x, y, Cp, Cn
247 // eps is the stopping tolerance
248 //
249 // solution will be put in w
250 
251 #undef GETI
252 #define GETI(i) (y[i]+1)
253 // To support weights for instances, use GETI(i) (i)
254 
255 void CLibLinear::solve_l2r_l1l2_svc(
256  const liblinear_problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
257 {
258  int l = prob->l;
259  int w_size = prob->n;
260  int i, s, iter = 0;
261  double C, d, G;
262  double *QD = SG_MALLOC(double, l);
263  int *index = SG_MALLOC(int, l);
264  double *alpha = SG_MALLOC(double, l);
265  int32_t *y = SG_MALLOC(int32_t, l);
266  int active_size = l;
267 
268  // PG: projected gradient, for shrinking and stopping
269  double PG;
270  double PGmax_old = CMath::INFTY;
271  double PGmin_old = -CMath::INFTY;
272  double PGmax_new, PGmin_new;
273 
274  // default solver_type: L2R_L2LOSS_SVC_DUAL
275  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
276  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
277  if(st == L2R_L1LOSS_SVC_DUAL)
278  {
279  diag[0] = 0;
280  diag[2] = 0;
281  upper_bound[0] = Cn;
282  upper_bound[2] = Cp;
283  }
284 
285  int n = prob->n;
286 
287  if (prob->use_bias)
288  n--;
289 
290  for(i=0; i<w_size; i++)
291  w[i] = 0;
292 
293  for(i=0; i<l; i++)
294  {
295  alpha[i] = 0;
296  if(prob->y[i] > 0)
297  {
298  y[i] = +1;
299  }
300  else
301  {
302  y[i] = -1;
303  }
304  QD[i] = diag[GETI(i)];
305 
306  QD[i] += prob->x->dot(i, prob->x,i);
307  index[i] = i;
308  }
309 
310 
311  CTime start_time;
312  while (iter < max_iterations && !CSignal::cancel_computations())
313  {
314  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
315  break;
316 
317  PGmax_new = -CMath::INFTY;
318  PGmin_new = CMath::INFTY;
319 
320  for (i=0; i<active_size; i++)
321  {
322  int j = CMath::random(i, active_size-1);
323  CMath::swap(index[i], index[j]);
324  }
325 
326  for (s=0;s<active_size;s++)
327  {
328  i = index[s];
329  int32_t yi = y[i];
330 
331  G = prob->x->dense_dot(i, w.vector, n);
332  if (prob->use_bias)
333  G+=w.vector[n];
334 
335  if (m_linear_term.vector)
336  G = G*yi + m_linear_term.vector[i];
337  else
338  G = G*yi-1;
339 
340  C = upper_bound[GETI(i)];
341  G += alpha[i]*diag[GETI(i)];
342 
343  PG = 0;
344  if (alpha[i] == 0)
345  {
346  if (G > PGmax_old)
347  {
348  active_size--;
349  CMath::swap(index[s], index[active_size]);
350  s--;
351  continue;
352  }
353  else if (G < 0)
354  PG = G;
355  }
356  else if (alpha[i] == C)
357  {
358  if (G < PGmin_old)
359  {
360  active_size--;
361  CMath::swap(index[s], index[active_size]);
362  s--;
363  continue;
364  }
365  else if (G > 0)
366  PG = G;
367  }
368  else
369  PG = G;
370 
371  PGmax_new = CMath::max(PGmax_new, PG);
372  PGmin_new = CMath::min(PGmin_new, PG);
373 
374  if(fabs(PG) > 1.0e-12)
375  {
376  double alpha_old = alpha[i];
377  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
378  d = (alpha[i] - alpha_old)*yi;
379 
380  prob->x->add_to_dense_vec(d, i, w.vector, n);
381 
382  if (prob->use_bias)
383  w.vector[n]+=d;
384  }
385  }
386 
387  iter++;
388  float64_t gap=PGmax_new - PGmin_new;
389  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6)
390 
391  if(gap <= eps)
392  {
393  if(active_size == l)
394  break;
395  else
396  {
397  active_size = l;
398  PGmax_old = CMath::INFTY;
399  PGmin_old = -CMath::INFTY;
400  continue;
401  }
402  }
403  PGmax_old = PGmax_new;
404  PGmin_old = PGmin_new;
405  if (PGmax_old <= 0)
406  PGmax_old = CMath::INFTY;
407  if (PGmin_old >= 0)
408  PGmin_old = -CMath::INFTY;
409  }
410 
411  SG_DONE()
412  SG_INFO("optimization finished, #iter = %d\n",iter)
413  if (iter >= max_iterations)
414  {
415  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
416  "(also see liblinear FAQ)\n\n");
417  }
418 
419  // calculate objective value
420 
421  double v = 0;
422  int nSV = 0;
423  for(i=0; i<w_size; i++)
424  v += w.vector[i]*w.vector[i];
425  for(i=0; i<l; i++)
426  {
427  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
428  if(alpha[i] > 0)
429  ++nSV;
430  }
431  SG_INFO("Objective value = %lf\n",v/2)
432  SG_INFO("nSV = %d\n",nSV)
433 
434  SG_FREE(QD);
435  SG_FREE(alpha);
436  SG_FREE(y);
437  SG_FREE(index);
438 }
439 
440 // A coordinate descent algorithm for
441 // L1-regularized L2-loss support vector classification
442 //
443 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
444 //
445 // Given:
446 // x, y, Cp, Cn
447 // eps is the stopping tolerance
448 //
449 // solution will be put in w
450 
451 #undef GETI
452 #define GETI(i) (y[i]+1)
453 // To support weights for instances, use GETI(i) (i)
454 
455 void CLibLinear::solve_l1r_l2_svc(
456  liblinear_problem *prob_col, double eps, double Cp, double Cn)
457 {
458  int l = prob_col->l;
459  int w_size = prob_col->n;
460  int j, s, iter = 0;
461  int active_size = w_size;
462  int max_num_linesearch = 20;
463 
464  double sigma = 0.01;
465  double d, G_loss, G, H;
466  double Gmax_old = CMath::INFTY;
467  double Gmax_new;
468  double Gmax_init=0;
469  double d_old, d_diff;
470  double loss_old=0, loss_new;
471  double appxcond, cond;
472 
473  int *index = SG_MALLOC(int, w_size);
474  int32_t *y = SG_MALLOC(int32_t, l);
475  double *b = SG_MALLOC(double, l); // b = 1-ywTx
476  double *xj_sq = SG_MALLOC(double, w_size);
477 
478  CDotFeatures* x = (CDotFeatures*) prob_col->x;
479  void* iterator;
480  int32_t ind;
481  float64_t val;
482 
483  double C[3] = {Cn,0,Cp};
484 
485  int n = prob_col->n;
486  if (prob_col->use_bias)
487  n--;
488 
489  for(j=0; j<l; j++)
490  {
491  b[j] = 1;
492  if(prob_col->y[j] > 0)
493  y[j] = 1;
494  else
495  y[j] = -1;
496  }
497 
498  for(j=0; j<w_size; j++)
499  {
500  w.vector[j] = 0;
501  index[j] = j;
502  xj_sq[j] = 0;
503 
504  if (use_bias && j==n)
505  {
506  for (ind=0; ind<l; ind++)
507  xj_sq[n] += C[GETI(ind)];
508  }
509  else
510  {
511  iterator=x->get_feature_iterator(j);
512  while (x->get_next_feature(ind, val, iterator))
513  xj_sq[j] += C[GETI(ind)]*val*val;
514  x->free_feature_iterator(iterator);
515  }
516  }
517 
518 
519  CTime start_time;
520  while (iter < max_iterations && !CSignal::cancel_computations())
521  {
522  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
523  break;
524 
525  Gmax_new = 0;
526 
527  for(j=0; j<active_size; j++)
528  {
529  int i = CMath::random(j, active_size-1);
530  CMath::swap(index[i], index[j]);
531  }
532 
533  for(s=0; s<active_size; s++)
534  {
535  j = index[s];
536  G_loss = 0;
537  H = 0;
538 
539  if (use_bias && j==n)
540  {
541  for (ind=0; ind<l; ind++)
542  {
543  if(b[ind] > 0)
544  {
545  double tmp = C[GETI(ind)]*y[ind];
546  G_loss -= tmp*b[ind];
547  H += tmp*y[ind];
548  }
549  }
550  }
551  else
552  {
553  iterator=x->get_feature_iterator(j);
554 
555  while (x->get_next_feature(ind, val, iterator))
556  {
557  if(b[ind] > 0)
558  {
559  double tmp = C[GETI(ind)]*val*y[ind];
560  G_loss -= tmp*b[ind];
561  H += tmp*val*y[ind];
562  }
563  }
564  x->free_feature_iterator(iterator);
565  }
566 
567  G_loss *= 2;
568 
569  G = G_loss;
570  H *= 2;
571  H = CMath::max(H, 1e-12);
572 
573  double Gp = G+1;
574  double Gn = G-1;
575  double violation = 0;
576  if(w.vector[j] == 0)
577  {
578  if(Gp < 0)
579  violation = -Gp;
580  else if(Gn > 0)
581  violation = Gn;
582  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
583  {
584  active_size--;
585  CMath::swap(index[s], index[active_size]);
586  s--;
587  continue;
588  }
589  }
590  else if(w.vector[j] > 0)
591  violation = fabs(Gp);
592  else
593  violation = fabs(Gn);
594 
595  Gmax_new = CMath::max(Gmax_new, violation);
596 
597  // obtain Newton direction d
598  if(Gp <= H*w.vector[j])
599  d = -Gp/H;
600  else if(Gn >= H*w.vector[j])
601  d = -Gn/H;
602  else
603  d = -w.vector[j];
604 
605  if(fabs(d) < 1.0e-12)
606  continue;
607 
608  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
609  d_old = 0;
610  int num_linesearch;
611  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
612  {
613  d_diff = d_old - d;
614  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
615 
616  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
617  if(appxcond <= 0)
618  {
619  if (use_bias && j==n)
620  {
621  for (ind=0; ind<l; ind++)
622  b[ind] += d_diff*y[ind];
623  break;
624  }
625  else
626  {
627  iterator=x->get_feature_iterator(j);
628  while (x->get_next_feature(ind, val, iterator))
629  b[ind] += d_diff*val*y[ind];
630 
631  x->free_feature_iterator(iterator);
632  break;
633  }
634  }
635 
636  if(num_linesearch == 0)
637  {
638  loss_old = 0;
639  loss_new = 0;
640 
641  if (use_bias && j==n)
642  {
643  for (ind=0; ind<l; ind++)
644  {
645  if(b[ind] > 0)
646  loss_old += C[GETI(ind)]*b[ind]*b[ind];
647  double b_new = b[ind] + d_diff*y[ind];
648  b[ind] = b_new;
649  if(b_new > 0)
650  loss_new += C[GETI(ind)]*b_new*b_new;
651  }
652  }
653  else
654  {
655  iterator=x->get_feature_iterator(j);
656  while (x->get_next_feature(ind, val, iterator))
657  {
658  if(b[ind] > 0)
659  loss_old += C[GETI(ind)]*b[ind]*b[ind];
660  double b_new = b[ind] + d_diff*val*y[ind];
661  b[ind] = b_new;
662  if(b_new > 0)
663  loss_new += C[GETI(ind)]*b_new*b_new;
664  }
665  x->free_feature_iterator(iterator);
666  }
667  }
668  else
669  {
670  loss_new = 0;
671  if (use_bias && j==n)
672  {
673  for (ind=0; ind<l; ind++)
674  {
675  double b_new = b[ind] + d_diff*y[ind];
676  b[ind] = b_new;
677  if(b_new > 0)
678  loss_new += C[GETI(ind)]*b_new*b_new;
679  }
680  }
681  else
682  {
683  iterator=x->get_feature_iterator(j);
684  while (x->get_next_feature(ind, val, iterator))
685  {
686  double b_new = b[ind] + d_diff*val*y[ind];
687  b[ind] = b_new;
688  if(b_new > 0)
689  loss_new += C[GETI(ind)]*b_new*b_new;
690  }
691  x->free_feature_iterator(iterator);
692  }
693  }
694 
695  cond = cond + loss_new - loss_old;
696  if(cond <= 0)
697  break;
698  else
699  {
700  d_old = d;
701  d *= 0.5;
702  delta *= 0.5;
703  }
704  }
705 
706  w.vector[j] += d;
707 
708  // recompute b[] if line search takes too many steps
709  if(num_linesearch >= max_num_linesearch)
710  {
711  SG_INFO("#")
712  for(int i=0; i<l; i++)
713  b[i] = 1;
714 
715  for(int i=0; i<n; i++)
716  {
717  if(w.vector[i]==0)
718  continue;
719 
720  iterator=x->get_feature_iterator(i);
721  while (x->get_next_feature(ind, val, iterator))
722  b[ind] -= w.vector[i]*val*y[ind];
723  x->free_feature_iterator(iterator);
724  }
725 
726  if (use_bias && w.vector[n])
727  {
728  for (ind=0; ind<l; ind++)
729  b[ind] -= w.vector[n]*y[ind];
730  }
731  }
732  }
733 
734  if(iter == 0)
735  Gmax_init = Gmax_new;
736  iter++;
737 
738  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
739  -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
740 
741  if(Gmax_new <= eps*Gmax_init)
742  {
743  if(active_size == w_size)
744  break;
745  else
746  {
747  active_size = w_size;
748  Gmax_old = CMath::INFTY;
749  continue;
750  }
751  }
752 
753  Gmax_old = Gmax_new;
754  }
755 
756  SG_DONE()
757  SG_INFO("optimization finished, #iter = %d\n", iter)
758  if(iter >= max_iterations)
759  SG_WARNING("\nWARNING: reaching max number of iterations\n")
760 
761  // calculate objective value
762 
763  double v = 0;
764  int nnz = 0;
765  for(j=0; j<w_size; j++)
766  {
767  if(w.vector[j] != 0)
768  {
769  v += fabs(w.vector[j]);
770  nnz++;
771  }
772  }
773  for(j=0; j<l; j++)
774  if(b[j] > 0)
775  v += C[GETI(j)]*b[j]*b[j];
776 
777  SG_INFO("Objective value = %lf\n", v)
778  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
779 
780  SG_FREE(index);
781  SG_FREE(y);
782  SG_FREE(b);
783  SG_FREE(xj_sq);
784 }
785 
786 // A coordinate descent algorithm for
787 // L1-regularized logistic regression problems
788 //
789 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
790 //
791 // Given:
792 // x, y, Cp, Cn
793 // eps is the stopping tolerance
794 //
795 // solution will be put in w
796 
797 #undef GETI
798 #define GETI(i) (y[i]+1)
799 // To support weights for instances, use GETI(i) (i)
800 
801 void CLibLinear::solve_l1r_lr(
802  const liblinear_problem *prob_col, double eps,
803  double Cp, double Cn)
804 {
805  int l = prob_col->l;
806  int w_size = prob_col->n;
807  int j, s, iter = 0;
808  int active_size = w_size;
809  int max_num_linesearch = 20;
810 
811  double x_min = 0;
812  double sigma = 0.01;
813  double d, G, H;
814  double Gmax_old = CMath::INFTY;
815  double Gmax_new;
816  double Gmax_init=0;
817  double sum1, appxcond1;
818  double sum2, appxcond2;
819  double cond;
820 
821  int *index = SG_MALLOC(int, w_size);
822  int32_t *y = SG_MALLOC(int32_t, l);
823  double *exp_wTx = SG_MALLOC(double, l);
824  double *exp_wTx_new = SG_MALLOC(double, l);
825  double *xj_max = SG_MALLOC(double, w_size);
826  double *C_sum = SG_MALLOC(double, w_size);
827  double *xjneg_sum = SG_MALLOC(double, w_size);
828  double *xjpos_sum = SG_MALLOC(double, w_size);
829 
830  CDotFeatures* x = prob_col->x;
831  void* iterator;
832  int ind;
833  double val;
834 
835  double C[3] = {Cn,0,Cp};
836 
837  int n = prob_col->n;
838  if (prob_col->use_bias)
839  n--;
840 
841  for(j=0; j<l; j++)
842  {
843  exp_wTx[j] = 1;
844  if(prob_col->y[j] > 0)
845  y[j] = 1;
846  else
847  y[j] = -1;
848  }
849  for(j=0; j<w_size; j++)
850  {
851  w.vector[j] = 0;
852  index[j] = j;
853  xj_max[j] = 0;
854  C_sum[j] = 0;
855  xjneg_sum[j] = 0;
856  xjpos_sum[j] = 0;
857 
858  if (use_bias && j==n)
859  {
860  for (ind=0; ind<l; ind++)
861  {
862  x_min = CMath::min(x_min, 1.0);
863  xj_max[j] = CMath::max(xj_max[j], 1.0);
864  C_sum[j] += C[GETI(ind)];
865  if(y[ind] == -1)
866  xjneg_sum[j] += C[GETI(ind)];
867  else
868  xjpos_sum[j] += C[GETI(ind)];
869  }
870  }
871  else
872  {
873  iterator=x->get_feature_iterator(j);
874  while (x->get_next_feature(ind, val, iterator))
875  {
876  x_min = CMath::min(x_min, val);
877  xj_max[j] = CMath::max(xj_max[j], val);
878  C_sum[j] += C[GETI(ind)];
879  if(y[ind] == -1)
880  xjneg_sum[j] += C[GETI(ind)]*val;
881  else
882  xjpos_sum[j] += C[GETI(ind)]*val;
883  }
884  x->free_feature_iterator(iterator);
885  }
886  }
887 
888  CTime start_time;
889  while (iter < max_iterations && !CSignal::cancel_computations())
890  {
891  if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
892  break;
893 
894  Gmax_new = 0;
895 
896  for(j=0; j<active_size; j++)
897  {
898  int i = CMath::random(j, active_size-1);
899  CMath::swap(index[i], index[j]);
900  }
901 
902  for(s=0; s<active_size; s++)
903  {
904  j = index[s];
905  sum1 = 0;
906  sum2 = 0;
907  H = 0;
908 
909  if (use_bias && j==n)
910  {
911  for (ind=0; ind<l; ind++)
912  {
913  double exp_wTxind = exp_wTx[ind];
914  double tmp1 = 1.0/(1+exp_wTxind);
915  double tmp2 = C[GETI(ind)]*tmp1;
916  double tmp3 = tmp2*exp_wTxind;
917  sum2 += tmp2;
918  sum1 += tmp3;
919  H += tmp1*tmp3;
920  }
921  }
922  else
923  {
924  iterator=x->get_feature_iterator(j);
925  while (x->get_next_feature(ind, val, iterator))
926  {
927  double exp_wTxind = exp_wTx[ind];
928  double tmp1 = val/(1+exp_wTxind);
929  double tmp2 = C[GETI(ind)]*tmp1;
930  double tmp3 = tmp2*exp_wTxind;
931  sum2 += tmp2;
932  sum1 += tmp3;
933  H += tmp1*tmp3;
934  }
935  x->free_feature_iterator(iterator);
936  }
937 
938  G = -sum2 + xjneg_sum[j];
939 
940  double Gp = G+1;
941  double Gn = G-1;
942  double violation = 0;
943  if(w.vector[j] == 0)
944  {
945  if(Gp < 0)
946  violation = -Gp;
947  else if(Gn > 0)
948  violation = Gn;
949  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
950  {
951  active_size--;
952  CMath::swap(index[s], index[active_size]);
953  s--;
954  continue;
955  }
956  }
957  else if(w.vector[j] > 0)
958  violation = fabs(Gp);
959  else
960  violation = fabs(Gn);
961 
962  Gmax_new = CMath::max(Gmax_new, violation);
963 
964  // obtain Newton direction d
965  if(Gp <= H*w.vector[j])
966  d = -Gp/H;
967  else if(Gn >= H*w.vector[j])
968  d = -Gn/H;
969  else
970  d = -w.vector[j];
971 
972  if(fabs(d) < 1.0e-12)
973  continue;
974 
975  d = CMath::min(CMath::max(d,-10.0),10.0);
976 
977  double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
978  int num_linesearch;
979  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
980  {
981  cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
982 
983  if(x_min >= 0)
984  {
985  double tmp = exp(d*xj_max[j]);
986  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
987  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
988  if(CMath::min(appxcond1,appxcond2) <= 0)
989  {
990  if (use_bias && j==n)
991  {
992  for (ind=0; ind<l; ind++)
993  exp_wTx[ind] *= exp(d);
994  }
995 
996  else
997  {
998  iterator=x->get_feature_iterator(j);
999  while (x->get_next_feature(ind, val, iterator))
1000  exp_wTx[ind] *= exp(d*val);
1001  x->free_feature_iterator(iterator);
1002  }
1003  break;
1004  }
1005  }
1006 
1007  cond += d*xjneg_sum[j];
1008 
1009  int i = 0;
1010 
1011  if (use_bias && j==n)
1012  {
1013  for (ind=0; ind<l; ind++)
1014  {
1015  double exp_dx = exp(d);
1016  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1017  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1018  i++;
1019  }
1020  }
1021  else
1022  {
1023 
1024  iterator=x->get_feature_iterator(j);
1025  while (x->get_next_feature(ind, val, iterator))
1026  {
1027  double exp_dx = exp(d*val);
1028  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1029  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1030  i++;
1031  }
1032  x->free_feature_iterator(iterator);
1033  }
1034 
1035  if(cond <= 0)
1036  {
1037  i = 0;
1038  if (use_bias && j==n)
1039  {
1040  for (ind=0; ind<l; ind++)
1041  {
1042  exp_wTx[ind] = exp_wTx_new[i];
1043  i++;
1044  }
1045  }
1046  else
1047  {
1048  iterator=x->get_feature_iterator(j);
1049  while (x->get_next_feature(ind, val, iterator))
1050  {
1051  exp_wTx[ind] = exp_wTx_new[i];
1052  i++;
1053  }
1054  x->free_feature_iterator(iterator);
1055  }
1056  break;
1057  }
1058  else
1059  {
1060  d *= 0.5;
1061  delta *= 0.5;
1062  }
1063  }
1064 
1065  w.vector[j] += d;
1066 
1067  // recompute exp_wTx[] if line search takes too many steps
1068  if(num_linesearch >= max_num_linesearch)
1069  {
1070  SG_INFO("#")
1071  for(int i=0; i<l; i++)
1072  exp_wTx[i] = 0;
1073 
1074  for(int i=0; i<w_size; i++)
1075  {
1076  if(w.vector[i]==0) continue;
1077 
1078  if (use_bias && i==n)
1079  {
1080  for (ind=0; ind<l; ind++)
1081  exp_wTx[ind] += w.vector[i];
1082  }
1083  else
1084  {
1085  iterator=x->get_feature_iterator(i);
1086  while (x->get_next_feature(ind, val, iterator))
1087  exp_wTx[ind] += w.vector[i]*val;
1088  x->free_feature_iterator(iterator);
1089  }
1090  }
1091 
1092  for(int i=0; i<l; i++)
1093  exp_wTx[i] = exp(exp_wTx[i]);
1094  }
1095  }
1096 
1097  if(iter == 0)
1098  Gmax_init = Gmax_new;
1099  iter++;
1100  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1101 
1102  if(Gmax_new <= eps*Gmax_init)
1103  {
1104  if(active_size == w_size)
1105  break;
1106  else
1107  {
1108  active_size = w_size;
1109  Gmax_old = CMath::INFTY;
1110  continue;
1111  }
1112  }
1113 
1114  Gmax_old = Gmax_new;
1115  }
1116 
1117  SG_DONE()
1118  SG_INFO("optimization finished, #iter = %d\n", iter)
1119  if(iter >= max_iterations)
1120  SG_WARNING("\nWARNING: reaching max number of iterations\n")
1121 
1122  // calculate objective value
1123 
1124  double v = 0;
1125  int nnz = 0;
1126  for(j=0; j<w_size; j++)
1127  if(w.vector[j] != 0)
1128  {
1129  v += fabs(w.vector[j]);
1130  nnz++;
1131  }
1132  for(j=0; j<l; j++)
1133  if(y[j] == 1)
1134  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1135  else
1136  v += C[GETI(j)]*log(1+exp_wTx[j]);
1137 
1138  SG_INFO("Objective value = %lf\n", v)
1139  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size)
1140 
1141  SG_FREE(index);
1142  SG_FREE(y);
1143  SG_FREE(exp_wTx);
1144  SG_FREE(exp_wTx_new);
1145  SG_FREE(xj_max);
1146  SG_FREE(C_sum);
1147  SG_FREE(xjneg_sum);
1148  SG_FREE(xjpos_sum);
1149 }
1150 
1151 // A coordinate descent algorithm for
1152 // the dual of L2-regularized logistic regression problems
1153 //
1154 // min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
1155 // s.t. 0 <= \alpha_i <= upper_bound_i,
1156 //
1157 // where Qij = yi yj xi^T xj and
1158 // upper_bound_i = Cp if y_i = 1
1159 // upper_bound_i = Cn if y_i = -1
1160 //
1161 // Given:
1162 // x, y, Cp, Cn
1163 // eps is the stopping tolerance
1164 //
1165 // solution will be put in w
1166 //
1167 // See Algorithm 5 of Yu et al., MLJ 2010
1168 
1169 #undef GETI
1170 #define GETI(i) (y[i]+1)
1171 // To support weights for instances, use GETI(i) (i)
1172 
1173 void CLibLinear::solve_l2r_lr_dual(const liblinear_problem *prob, double eps, double Cp, double Cn)
1174 {
1175  int l = prob->l;
1176  int w_size = prob->n;
1177  int i, s, iter = 0;
1178  double *xTx = new double[l];
1179  int max_iter = 1000;
1180  int *index = new int[l];
1181  double *alpha = new double[2*l]; // store alpha and C - alpha
1182  int32_t *y = new int32_t[l];
1183  int max_inner_iter = 100; // for inner Newton
1184  double innereps = 1e-2;
1185  double innereps_min = CMath::min(1e-8, eps);
1186  double upper_bound[3] = {Cn, 0, Cp};
1187  double Gmax_init = 0;
1188 
1189  for(i=0; i<l; i++)
1190  {
1191  if(prob->y[i] > 0)
1192  {
1193  y[i] = +1;
1194  }
1195  else
1196  {
1197  y[i] = -1;
1198  }
1199  }
1200 
1201  // Initial alpha can be set here. Note that
1202  // 0 < alpha[i] < upper_bound[GETI(i)]
1203  // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
1204  for(i=0; i<l; i++)
1205  {
1206  alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
1207  alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
1208  }
1209 
1210  for(i=0; i<w_size; i++)
1211  w[i] = 0;
1212  for(i=0; i<l; i++)
1213  {
1214  xTx[i] = prob->x->dot(i, prob->x,i);
1215  prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
1216 
1217  if (prob->use_bias)
1218  {
1219  w.vector[w_size]+=y[i]*alpha[2*i];
1220  xTx[i]+=1;
1221  }
1222  index[i] = i;
1223  }
1224 
1225  while (iter < max_iter)
1226  {
1227  for (i=0; i<l; i++)
1228  {
1229  int j = CMath::random(i, l-1);
1230  CMath::swap(index[i], index[j]);
1231  }
1232  int newton_iter = 0;
1233  double Gmax = 0;
1234  for (s=0; s<l; s++)
1235  {
1236  i = index[s];
1237  int32_t yi = y[i];
1238  double C = upper_bound[GETI(i)];
1239  double ywTx = 0, xisq = xTx[i];
1240 
1241  ywTx = prob->x->dense_dot(i, w.vector, w_size);
1242  if (prob->use_bias)
1243  ywTx+=w.vector[w_size];
1244 
1245  ywTx *= y[i];
1246  double a = xisq, b = ywTx;
1247 
1248  // Decide to minimize g_1(z) or g_2(z)
1249  int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1250  if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1251  {
1252  ind1 = 2*i+1;
1253  ind2 = 2*i;
1254  sign = -1;
1255  }
1256 
1257  // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
1258  double alpha_old = alpha[ind1];
1259  double z = alpha_old;
1260  if(C - z < 0.5 * C)
1261  z = 0.1*z;
1262  double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
1263  Gmax = CMath::max(Gmax, CMath::abs(gp));
1264 
1265  // Newton method on the sub-problem
1266  const double eta = 0.1; // xi in the paper
1267  int inner_iter = 0;
1268  while (inner_iter <= max_inner_iter)
1269  {
1270  if(fabs(gp) < innereps)
1271  break;
1272  double gpp = a + C/(C-z)/z;
1273  double tmpz = z - gp/gpp;
1274  if(tmpz <= 0)
1275  z *= eta;
1276  else // tmpz in (0, C)
1277  z = tmpz;
1278  gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1279  newton_iter++;
1280  inner_iter++;
1281  }
1282 
1283  if(inner_iter > 0) // update w
1284  {
1285  alpha[ind1] = z;
1286  alpha[ind2] = C-z;
1287 
1288  prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
1289 
1290  if (prob->use_bias)
1291  w.vector[w_size]+=sign*(z-alpha_old)*yi;
1292  }
1293  }
1294 
1295  if(iter == 0)
1296  Gmax_init = Gmax;
1297  iter++;
1298 
1299  SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6)
1300 
1301  if(Gmax < eps)
1302  break;
1303 
1304  if(newton_iter <= l/10)
1305  innereps = CMath::max(innereps_min, 0.1*innereps);
1306 
1307  }
1308 
1309  SG_DONE()
1310  SG_INFO("optimization finished, #iter = %d\n",iter)
1311  if (iter >= max_iter)
1312  SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1313 
1314  // calculate objective value
1315 
1316  double v = 0;
1317  for(i=0; i<w_size; i++)
1318  v += w[i] * w[i];
1319  v *= 0.5;
1320  for(i=0; i<l; i++)
1321  v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1322  - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
1323  SG_INFO("Objective value = %lf\n", v)
1324 
1325  delete [] xTx;
1326  delete [] alpha;
1327  delete [] y;
1328  delete [] index;
1329 }
1330 
1331 
1333 {
1334  if (!m_labels)
1335  SG_ERROR("Please assign labels first!\n")
1336 
1337  int32_t num_labels=m_labels->get_num_labels();
1338 
1339  if (num_labels!=linear_term.vlen)
1340  {
1341  SG_ERROR("Number of labels (%d) does not match number"
1342  " of entries (%d) in linear term \n", num_labels,
1343  linear_term.vlen);
1344  }
1345 
1346  m_linear_term=linear_term;
1347 }
1348 
1350 {
1352  SG_ERROR("Please assign linear term first!\n")
1353 
1354  return m_linear_term;
1355 }
1356 
1358 {
1359  if (!m_labels)
1360  SG_ERROR("Please assign labels first!\n")
1361 
1364 }

SHOGUN Machine Learning Toolbox - Documentation