SHOGUN  4.2.0
slep_solver.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Sergey Lisitsyn
8  * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
9  */
10
11
13 #ifdef USE_GPL_SHOGUN
20 #include <shogun/lib/Signal.h>
21
22 namespace shogun
23 {
24
25 double compute_regularizer(double* w, double lambda, double lambda2, int n_vecs, int n_feats,
26  int n_blocks, const slep_options& options)
27 {
28  double regularizer = 0.0;
29  switch (options.mode)
30  {
32  {
33  for (int i=0; i<n_feats; i++)
34  {
35  double w_row_norm = 0.0;
36  for (int t=0; t<n_blocks; t++)
37  w_row_norm += CMath::pow(w[i+t*n_feats],options.q);
38  regularizer += CMath::pow(w_row_norm,1.0/options.q);
39  }
40  regularizer *= lambda;
41  }
42  break;
44  {
45  for (int i=0; i<n_feats; i++)
46  {
47  double tree_norm = 0.0;
48
49  if (options.general)
50  tree_norm = general_treeNorm(w+i, n_blocks, n_blocks, options.G, options.ind_t, options.n_nodes);
51  else
52  tree_norm = treeNorm(w+i, n_blocks, n_blocks, options.ind_t, options.n_nodes);
53
54  regularizer += tree_norm;
55  }
56  regularizer *= lambda;
57  }
58  break;
59  case FEATURE_GROUP:
60  {
61  for (int t=0; t<n_blocks; t++)
62  {
63  double group_qpow_sum = 0.0;
64  int group_ind_start = options.ind[t];
65  int group_ind_end = options.ind[t+1];
66  for (int i=group_ind_start; i<group_ind_end; i++)
67  group_qpow_sum += CMath::pow(w[i], options.q);
68
69  regularizer += options.gWeight[t]*CMath::pow(group_qpow_sum, 1.0/options.q);
70  }
71  regularizer *= lambda;
72  }
73  break;
74  case FEATURE_TREE:
75  {
76  if (options.general)
77  regularizer = general_treeNorm(w, 1, n_feats, options.G, options.ind_t, options.n_nodes);
78  else
79  regularizer = treeNorm(w, 1, n_feats, options.ind_t, options.n_nodes);
80
81  regularizer *= lambda;
82  }
83  break;
84  case PLAIN:
85  {
86  for (int i=0; i<n_feats; i++)
87  regularizer += CMath::abs(w[i]);
88
89  regularizer *= lambda;
90  }
91  break;
92  case FUSED:
93  {
94  double l1 = 0.0;
95  for (int i=0; i<n_feats; i++)
96  l1 += CMath::abs(w[i]);
97  regularizer += lambda*l1;
98  double fuse = 0.0;
99  for (int i=1; i<n_feats; i++)
100  fuse += CMath::abs(w[i]-w[i-1]);
101  regularizer += lambda2*fuse;
102  }
103  break;
104  }
105  return regularizer;
106 };
107
108 double compute_lambda(
109  double* ATx,
110  double z,
111  CDotFeatures* features,
112  double* y,
113  int n_vecs, int n_feats,
114  int n_blocks,
115  const slep_options& options)
116 {
117  double lambda_max = 0.0;
118  if (z<0 || z>1)
119  SG_SERROR("z is not in range [0,1]")
120
121  double q_bar = 0.0;
122  if (options.q==1)
123  q_bar = CMath::ALMOST_INFTY;
124  else if (options.q>1e6)
125  q_bar = 1;
126  else
127  q_bar = options.q/(options.q-1);
128
129  SG_SINFO("q bar = %f \n",q_bar)
130
131  switch (options.mode)
132  {
135  {
136  for (int t=0; t<n_blocks; t++)
137  {
140
141  switch (options.loss)
142  {
143  case LOGISTIC:
144  {
145  double b = 0.0;
146  int m1 = 0, m2 = 0;
147  for (int i=0; i<n_vecs_task; i++)
148  {
150  m1++;
151  else
152  m2++;
153  }
154  for (int i=0; i<n_vecs_task; i++)
155  {
157  b = double(m1)/(m1+m2);
158  else
159  b = -double(m2)/(m1+m2);
160
162  }
163  }
164  break;
165  case LEAST_SQUARES:
166  {
167  for (int i=0; i<n_vecs_task; i++)
169  }
170  }
171  }
172  }
173  break;
174  case FEATURE_GROUP:
175  case FEATURE_TREE:
176  case PLAIN:
177  case FUSED:
178  {
179  switch (options.loss)
180  {
181  case LOGISTIC:
182  {
183  int m1 = 0, m2 = 0;
184  double b = 0.0;
185  for (int i=0; i<n_vecs; i++)
186  y[i]>0 ? m1++ : m2++;
187
188  SG_SDEBUG("# pos = %d , # neg = %d\n",m1,m2)
189
190  for (int i=0; i<n_vecs; i++)
191  {
192  y[i]>0 ? b=double(m2) / CMath::sq(n_vecs) : b=-double(m1) / CMath::sq(n_vecs);
194  }
195  }
196  break;
197  case LEAST_SQUARES:
198  {
199  for (int i=0; i<n_vecs; i++)
201  }
202  break;
203  }
204  }
205  break;
206  }
207
208  switch (options.mode)
209  {
211  {
212  for (int i=0; i<n_feats; i++)
213  {
214  double sum = 0.0;
215  for (int t=0; t<n_blocks; t++)
216  sum += CMath::pow(fabs(ATx[t*n_feats+i]),q_bar);
217  lambda_max =
218  CMath::max(lambda_max, CMath::pow(sum,1.0/q_bar));
219  }
220
221  if (options.loss==LOGISTIC)
222  lambda_max /= n_vecs;
223  }
224  break;
226  {
227  if (options.general)
228  lambda_max = general_findLambdaMax_mt(ATx, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes);
229  else
230  lambda_max = findLambdaMax_mt(ATx, n_feats, n_blocks, options.ind_t, options.n_nodes);
231
232  lambda_max /= n_vecs*n_blocks;
233  }
234  break;
235  case FEATURE_GROUP:
236  {
237  for (int t=0; t<n_blocks; t++)
238  {
239  int group_ind_start = options.ind[t];
240  int group_ind_end = options.ind[t+1];
241  double sum = 0.0;
242  for (int i=group_ind_start; i<group_ind_end; i++)
243  sum += CMath::pow(fabs(ATx[i]),q_bar);
244
245  sum = CMath::pow(sum, 1.0/q_bar);
246  sum /= options.gWeight[t];
247  SG_SINFO("sum = %f\n",sum)
248  if (sum>lambda_max)
249  lambda_max = sum;
250  }
251  }
252  break;
253  case FEATURE_TREE:
254  {
255  if (options.general)
256  lambda_max = general_findLambdaMax(ATx, n_feats, options.G, options.ind_t, options.n_nodes);
257  else
258  lambda_max = findLambdaMax(ATx, n_feats, options.ind_t, options.n_nodes);
259  }
260  break;
261  case PLAIN:
262  case FUSED:
263  {
264  double max = 0.0;
265  for (int i=0; i<n_feats; i++)
266  {
267  if (CMath::abs(ATx[i]) > max)
268  max = CMath::abs(ATx[i]);
269  }
270  lambda_max = max;
271  }
272  break;
273  }
274
275  SG_SINFO("Computed lambda = %f * %f = %f\n",z,lambda_max,z*lambda_max)
276  return z*lambda_max;
277 }
278
279 void projection(double* w, double* v, int n_feats, int n_blocks, double lambda, double lambda2,
280  double L, double* z, double* z0, const slep_options& options)
281 {
282  switch (options.mode)
283  {
285  eppMatrix(w, v, n_feats, n_blocks, lambda/L, options.q);
286  break;
288  if (options.general)
289  general_altra_mt(w, v, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes, lambda/L);
290  else
291  altra_mt(w, v, n_feats, n_blocks, options.ind_t, options.n_nodes, lambda/L);
292  break;
293  case FEATURE_GROUP:
294  eppVector(w, v, options.ind, n_blocks, n_feats, options.gWeight, lambda/L, options.q > 1e6 ? 1e6 : options.q);
295  break;
296  case FEATURE_TREE:
297  if (options.general)
298  general_altra(w, v, n_feats, options.G, options.ind_t, options.n_nodes, lambda/L);
299  else
300  altra(w, v, n_feats, options.ind_t, options.n_nodes, lambda/L);
301  break;
302  case PLAIN:
303  for (int i=0; i<n_feats; i++)
304  w[i] = CMath::sign(v[i])*CMath::max(0.0,CMath::abs(v[i])-lambda/L);
305  break;
306  case FUSED:
307  flsa(w,z,NULL,v,z0,lambda/L,lambda2/L,n_feats,1000,1e-8,1,6);
308  for (int i=0; i<n_feats; i++)
309  z0[i] = z[i];
310  break;
311  }
312
313 }
314
315 double search_point_gradient_and_objective(CDotFeatures* features, double* ATx, double* As,
316  double* sc, double* y, int n_vecs,
318  double* g, double* gc,
319  const slep_options& options)
320 {
321  double fun_s = 0.0;
322  //SG_SDEBUG("As=%f\n", CMath::dot(As,As,n_vecs))
324  switch (options.mode)
325  {
328  for (int t=0; t<n_tasks; t++)
329  {
332  switch (options.loss)
333  {
334  case LOGISTIC:
335  gc[t] = 0.0;
336  for (int i=0; i<n_vecs_task; i++)
337  {
339  double bb = CMath::max(aa,0.0);
340  fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/ n_vecs;
341  double prob = 1.0/(1.0+CMath::exp(aa));
342  double b = -y[task_idx[i]]*(1.0-prob) / n_vecs;
343  gc[t] += b;
345  }
346  break;
347  case LEAST_SQUARES:
348  for (int i=0; i<n_feats*n_tasks; i++)
349  g[i] = -ATx[i];
350  for (int i=0; i<n_vecs_task; i++)
352  break;
353  }
354  }
355  break;
356  case FEATURE_GROUP:
357  case FEATURE_TREE:
358  case PLAIN:
359  case FUSED:
360  switch (options.loss)
361  {
362  case LOGISTIC:
363  gc[0] = 0.0;
364
365  for (int i=0; i<n_vecs; i++)
366  {
367  double aa = -y[i]*(As[i]+sc[0]);
368  double bb = CMath::max(aa,0.0);
369  fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
370  /*
371  if (y[i]>0)
372  fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*pos_weight;
373  else
374  fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*neg_weight;
375  */
376  double prob = 1.0/(1.0+CMath::exp(aa));
377  //double b = 0;
378  double b = -y[i]*(1.0-prob)/n_vecs;
379  /*
380  if (y[i]>0)
381  b = -y[i]*(1.0-prob)*pos_weight;
382  else
383  b = -y[i]*(1.0-prob)*neg_weight;
384  */
385  gc[0] += b;
387  }
388  fun_s /= n_vecs;
389  break;
390  case LEAST_SQUARES:
391  for (int i=0; i<n_feats; i++)
392  g[i] = -ATx[i];
393  for (int i=0; i<n_vecs; i++)
395  break;
396  }
397  break;
398  }
400
401  return fun_s;
402 }
403
404 slep_result_t slep_solver(
405  CDotFeatures* features,
406  double* y,
407  double z,
408  const slep_options& options)
409 {
410  int i,t;
411  int n_feats = features->get_dim_feature_space();
412  int n_vecs = features->get_num_vectors();
413  double lambda, beta;
414  double funcp = 0.0, func = 0.0;
415
416  int n_blocks = 0;
418
419  switch (options.mode)
420  {
425  break;
426  case FEATURE_GROUP:
427  case FEATURE_TREE:
429  n_blocks = options.n_feature_blocks;
430  break;
431  case PLAIN:
432  case FUSED:
434  n_blocks = 1;
435  break;
436  }
438  SG_SDEBUG("n_nodes = %d\n",options.n_nodes)
439
440  int iter = 1;
441  bool done = false;
443
444  double rsL2 = options.rsL2;
445
446  double* ATx = SG_CALLOC(double, n_feats*n_tasks);
447  if (options.regularization!=0)
448  {
449  lambda = compute_lambda(ATx, z, features, y, n_vecs, n_feats, n_blocks, options);
450  rsL2*= lambda;
451  }
452  else
453  lambda = z;
454
455  double lambda2 = 0.0;
456
458  w.zero();
460  c.zero();
461
462  if (options.last_result)
463  {
464  w = options.last_result->w;
465  c = options.last_result->c;
466  }
467
468  double* s = SG_CALLOC(double, n_feats*n_tasks);
469  double* sc = SG_CALLOC(double, n_tasks);
470  double* g = SG_CALLOC(double, n_feats*n_tasks);
471  double* v = SG_CALLOC(double, n_feats*n_tasks);
472  double* z_flsa = SG_CALLOC(double, n_feats);
473  double* z0_flsa = SG_CALLOC(double, n_feats);
474
475  double* Aw = SG_CALLOC(double, n_vecs);
476  switch (options.mode)
477  {
480  {
481  for (t=0; t<n_blocks; t++)
482  {
488  }
489  }
490  break;
491  case FEATURE_GROUP:
492  case FEATURE_TREE:
493  case PLAIN:
494  case FUSED:
495  {
496  for (i=0; i<n_vecs; i++)
497  Aw[i] = features->dense_dot(i,w.matrix,n_feats);
498  }
499  break;
500  }
501
502  double* Av = SG_MALLOC(double, n_vecs);
503  double* As = SG_MALLOC(double, n_vecs);
504
505  double L = 1.0/n_vecs;
506
507  if (options.mode==FUSED)
508  L += rsL2;
509
510  double* wp = SG_CALLOC(double, n_feats*n_tasks);
512  wp[i] = w[i];
513  double* Awp = SG_MALLOC(double, n_vecs);
514  for (i=0; i<n_vecs; i++)
515  Awp[i] = Aw[i];
516  double* wwp = SG_CALLOC(double, n_feats*n_tasks);
517
518  double* cp = SG_MALLOC(double, n_tasks);
520  cp[t] = c[t];
521  double* ccp = SG_CALLOC(double, n_tasks);
522
523  double* gc = SG_MALLOC(double, n_tasks);
524  double alphap = 0.0, alpha = 1.0;
525  double fun_x = 0.0;
526
527  while (!done && iter <= options.max_iter && !CSignal::cancel_computations())
528  {
529  beta = (alphap-1.0)/alpha;
530
532  s[i] = w[i] + beta*wwp[i];
534  sc[t] = c[t] + beta*ccp[t];
535  for (i=0; i<n_vecs; i++)
536  As[i] = Aw[i] + beta*(Aw[i]-Awp[i]);
538  g[i] = 0.0;
539
540  double fun_s = search_point_gradient_and_objective(features, ATx, As, sc, y, n_vecs, n_feats, n_tasks, g, gc, options);
541
542  //SG_SDEBUG("fun_s = %f\n", fun_s)
543
544  if (options.mode==PLAIN || options.mode==FUSED)
545  fun_s += rsL2/2 * CMath::dot(w.matrix,w.matrix,n_feats);
546
548  wp[i] = w[i];
550  cp[t] = c[t];
551  for (i=0; i<n_vecs; i++)
552  Awp[i] = Aw[i];
553
554  int inner_iter = 1;
555  while (inner_iter <= 1000)
556  {
558  v[i] = s[i] - g[i]*(1.0/L);
559
561  c[t] = sc[t] - gc[t]*(1.0/L);
562
563  projection(w.matrix,v,n_feats,n_blocks,lambda,lambda2,L,z_flsa,z0_flsa,options);
564
566  v[i] = w[i] - s[i];
567
568  fun_x = 0.0;
569  switch (options.mode)
570  {
573  for (t=0; t<n_blocks; t++)
574  {
578  {
580  if (options.loss==LOGISTIC)
581  {
583  double bb = CMath::max(aa,0.0);
584  fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
585  }
586  }
587  }
588  break;
589  case FEATURE_GROUP:
590  case FEATURE_TREE:
591  case PLAIN:
592  case FUSED:
593  for (i=0; i<n_vecs; i++)
594  {
595  Aw[i] = features->dense_dot(i, w.matrix, n_feats);
596  if (options.loss==LOGISTIC)
597  {
598  double aa = -y[i]*(Aw[i]+c[0]);
599  double bb = CMath::max(aa,0.0);
600  if (y[i]>0)
601  fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*pos_weight;
602  else
603  fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*neg_weight;
604  }
605  }
606  break;
607  }
608  if (options.loss==LOGISTIC)
609  fun_x /= n_vecs;
610  if (options.mode==PLAIN || options.mode==FUSED)
611  fun_x += rsL2/2 * CMath::dot(w.matrix,w.matrix,n_feats);
612
613  double l_sum = 0.0, r_sum = 0.0;
614  switch (options.loss)
615  {
616  case LOGISTIC:
618  l_sum = fun_x - fun_s - CMath::dot(v,g,n_feats*n_tasks);
620  {
621  r_sum += CMath::sq(c[t] - sc[t]);
622  l_sum -= (c[t] - sc[t])*gc[t];
623  }
624  r_sum /= 2.0;
625  break;
626  case LEAST_SQUARES:
628  for (i=0; i<n_vecs; i++)
629  l_sum += CMath::sq(Aw[i]-As[i]);
630  break;
631  }
632
633  if (r_sum <= 1e-20)
634  {
636  break;
637  }
638
639  if (l_sum <= r_sum*L)
640  break;
641  else
642  L = CMath::max(2*L, l_sum/r_sum);
643  inner_iter++;
644  }
645
646  alphap = alpha;
647  alpha = 0.5*(1+CMath::sqrt(4*alpha*alpha+1));
649  wwp[i] = w[i] - wp[i];
651  ccp[t] = c[t] - cp[t];
652  double regularizer = compute_regularizer(w.matrix, lambda, lambda2, n_vecs, n_feats, n_blocks, options);
653  funcp = func;
654
655  if (options.loss==LOGISTIC)
656  {
657  func = fun_x + regularizer;
658  }
659  if (options.loss==LEAST_SQUARES)
660  {
661  func = regularizer;
662  for (i=0; i<n_vecs; i++)
663  func += CMath::sq(Aw[i] - y[i]);
664  }
665  SG_SDEBUG("Obj = %f + %f = %f \n",fun_x, regularizer, func)
666
668  {
669  SG_SINFO("Gradient norm is less than 1e-20\n")
670  break;
671  }
672
673  double norm_wp, norm_wwp;
674  double step;
675  switch (options.termination)
676  {
677  case 0:
678  if (iter>=2)
679  {
680  step = CMath::abs(func-funcp);
681  if (step <= options.tolerance)
682  {
683  SG_SINFO("Objective changes less than tolerance\n")
684  done = true;
685  }
686  }
687  break;
688  case 1:
689  if (iter>=2)
690  {
691  step = CMath::abs(func-funcp);
692  if (step <= step*options.tolerance)
693  {
694  SG_SINFO("Objective changes relatively less than tolerance\n")
695  done = true;
696  }
697  }
698  break;
699  case 2:
700  if (func <= options.tolerance)
701  {
702  SG_SINFO("Objective is less than tolerance\n")
703  done = true;
704  }
705  break;
706  case 3:
708  if (norm_wwp <= options.tolerance)
709  done = true;
710  break;
711  case 4:
714  if (norm_wwp <= options.tolerance*CMath::max(norm_wp,1.0))
715  done = true;
716  break;
717  default:
718  done = true;
719  }
720
721  iter++;
722  }
723  SG_SINFO("Finished %d iterations, objective = %f\n", iter, func)
724
725  SG_FREE(ATx);
726  SG_FREE(wp);
727  SG_FREE(wwp);
728  SG_FREE(s);
729  SG_FREE(sc);
730  SG_FREE(cp);
731  SG_FREE(ccp);
732  SG_FREE(g);
733  SG_FREE(v);
734  SG_FREE(Aw);
735  SG_FREE(Awp);
736  SG_FREE(Av);
737  SG_FREE(As);
738  SG_FREE(gc);
739  SG_FREE(z_flsa);
740  SG_FREE(z0_flsa);
741
742  return slep_result_t(w,c);
743 };
744 };
745
746 #endif //USE_GPL_SHOGUN
Vector::Scalar dot(Vector a, Vector b)
Definition: Redux.h:58
static T sq(T x)
Definition: Math.h:450
static T max(T a, T b)
Definition: Math.h:168
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
static bool cancel_computations()
Definition: Signal.h:86
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define SG_SDEBUG(...)
Definition: SGIO.h:168
static T sign(T a)
Definition: Math.h:426
#define SG_SERROR(...)
Definition: SGIO.h:179
static float64_t exp(float64_t x)
Definition: Math.h:621
#define SG_SINFO(...)
Definition: SGIO.h:173
static float64_t log(float64_t v)
Definition: Math.h:922
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
static float32_t sqrt(float32_t x)
Definition: Math.h:459
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation