SHOGUN  5.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
libncbm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Viktor Gal
8  *
9  */
10 
12 #ifdef USE_GPL_SHOGUN
13 #include <shogun/lib/Time.h>
15 
16 #include <shogun/lib/external/libqp.h>
18 
19 #include <vector>
20 
21 namespace shogun
22 {
23 
24 static float64_t* HMatrix;
25 static uint32_t maxCPs;
26 static const float64_t epsilon=0.0;
27 
28 static const float64_t *get_col(uint32_t i)
29 {
30  return (&HMatrix[maxCPs*i]);
31 }
32 
33 IGNORE_IN_CLASSLIST struct line_search_res
34 {
35  /* */
36  float64_t a;
37  float64_t fval;
38  float64_t reg;
39  SGVector<float64_t> solution;
40  SGVector<float64_t> gradient;
41 };
42 
43 inline static line_search_res zoom
44 (
45  CDualLibQPBMSOSVM *machine,
46  float64_t lambda,
47  float64_t a_lo,
48  float64_t a_hi,
49  float64_t initial_fval,
50  SGVector<float64_t>& initial_solution,
51  SGVector<float64_t>& search_dir,
52  float64_t wolfe_c1,
53  float64_t wolfe_c2,
54  float64_t init_lgrad,
55  float64_t f_lo,
56  float64_t g_lo,
57  float64_t f_hi,
58  float64_t g_hi
59 )
60 {
61  line_search_res ls_res;
62  ls_res.solution.resize_vector(initial_solution.vlen);
63  ls_res.gradient.resize_vector(initial_solution.vlen);
64 
65  SGVector<float64_t> cur_solution(initial_solution.vlen);
66  cur_solution.zero();
67  SGVector<float64_t> cur_grad(initial_solution.vlen);
68 
69  uint32_t iter = 0;
70  while (1)
71  {
72  float64_t d1 = g_lo+g_hi - 3*(f_lo-f_hi)/(a_lo-a_hi);
73  float64_t d2 = CMath::sqrt(d1*d1 - g_lo*g_hi);
74  float64_t a_j = a_hi -(a_hi-a_lo)*(g_hi+d2-d1)/(g_hi-g_lo+2*d2);
75 
76  if (a_lo < a_hi)
77  {
78  if ((a_j < a_lo) || (a_j > a_hi))
79  {
80  a_j = 0.5*(a_lo+a_hi);
81  }
82  }
83  else
84  {
85  if ((a_j > a_lo) || (a_j < a_hi))
86  {
87  a_j = 0.5*(a_lo+a_hi);
88  }
89  }
90 
91  cur_solution.add(cur_solution.vector, 1.0,
92  initial_solution.vector, a_j,
93  search_dir.vector, cur_solution.vlen);
94 
95  float64_t cur_fval = machine->risk(cur_grad.vector, cur_solution.vector);
96  float64_t cur_reg
97  = 0.5*lambda*CMath::dot(cur_solution.vector,
98  cur_solution.vector, cur_solution.vlen);
99  cur_fval += cur_reg;
100 
101  cur_grad.vec1_plus_scalar_times_vec2(cur_grad.vector, lambda, cur_solution.vector, cur_grad.vlen);
102 
103  if
104  (
105  (cur_fval > (initial_fval+wolfe_c1*a_j*init_lgrad))
106  ||
107  (cur_fval > f_lo)
108  )
109  {
110  a_hi = a_j;
111  f_hi = cur_fval;
112  g_hi = CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
113  }
114  else
115  {
116  float64_t cur_lgrad
117  = CMath::dot(cur_grad.vector, search_dir.vector, cur_grad.vlen);
118 
119  if (CMath::abs(cur_lgrad) < -wolfe_c2*init_lgrad)
120  {
121  ls_res.a = a_j;
122  ls_res.fval = cur_fval;
123  ls_res.reg = cur_reg;
124  ls_res.gradient = cur_grad;
125  ls_res.solution = cur_solution;
126 // SG_SPRINT("in zoom (wolfe2): %f\n", cur_fval)
127  return ls_res;
128  }
129 
130  if (cur_lgrad*(a_hi - a_lo) >= 0)
131  {
132  a_hi = a_lo;
133  f_hi = f_lo;
134  g_hi = g_lo;
135  }
136  a_lo = a_j;
137  f_lo = cur_fval;
138  g_lo = cur_lgrad;
139  }
140 
141  if
142  (
143  (CMath::abs(a_lo - a_hi) <= 0.01*a_lo)
144  ||
145  (iter >= 5)
146  )
147  {
148  ls_res.a = a_j;
149  ls_res.fval = cur_fval;
150  ls_res.reg = cur_reg;
151  ls_res.gradient = cur_grad;
152  ls_res.solution = cur_solution;
153 // SG_SPRINT("in zoom iter: %d %f\n", iter, cur_fval)
154  return ls_res;
155  }
156  iter++;
157  }
158 }
159 
160 inline std::vector<line_search_res> line_search_with_strong_wolfe
161 (
162  CDualLibQPBMSOSVM *machine,
163  float64_t lambda,
164  float64_t initial_val,
165  SGVector<float64_t>& initial_solution,
166  SGVector<float64_t>& initial_grad,
167  SGVector<float64_t>& search_dir,
168  float64_t astart,
169  float64_t amax = 1.1,
170  float64_t wolfe_c1 = 1E-4,
171  float64_t wolfe_c2 = 0.9,
172  float64_t max_iter = 5
173 )
174 {
175  /* NOTE:
176  * model->risk returns only the risk as it's name says
177  * have to cur_fval = model.risk + (lambda*0.5*w*w')
178  *
179  * subgrad should be: subgrad + (lambda*w)
180  *
181  */
182 
183  uint32_t iter = 0;
184 
185  initial_grad.vec1_plus_scalar_times_vec2(initial_grad.vector, lambda, initial_solution.vector, initial_grad.vlen);
186 
187  float64_t initial_lgrad = CMath::dot(initial_grad.vector, search_dir.vector, initial_grad.vlen);
188  float64_t prev_lgrad = initial_lgrad;
189  float64_t prev_fval = initial_val;
190 
191  float64_t prev_a = 0;
192  float64_t cur_a = astart;
193 
194  std::vector<line_search_res> ret;
195  while (1)
196  {
197  SGVector<float64_t> x(initial_solution.vlen);
198  SGVector<float64_t> cur_subgrad(initial_solution.vlen);
199 
200  x.add(x.vector, 1.0, initial_solution.vector, cur_a, search_dir.vector, x.vlen);
201  float64_t cur_fval = machine->risk(cur_subgrad.vector, x.vector);
202  float64_t cur_reg = 0.5*lambda*CMath::dot(x.vector, x.vector, x.vlen);
203  cur_fval += cur_reg;
204 
205  cur_subgrad.vec1_plus_scalar_times_vec2(cur_subgrad.vector, lambda, x.vector, x.vlen);
206  if (iter == 0)
207  {
208  line_search_res initial_step;
209  initial_step.fval = cur_fval;
210  initial_step.reg = cur_reg;
211  initial_step.gradient = cur_subgrad;
212  initial_step.solution = x;
213  ret.push_back(initial_step);
214  }
215 
216  float64_t cur_lgrad
217  = CMath::dot(cur_subgrad.vector, search_dir.vector,
218  cur_subgrad.vlen);
219  if
220  (
221  (cur_fval > initial_val+wolfe_c1*cur_a*initial_lgrad)
222  ||
223  (cur_fval >= prev_fval && iter > 0)
224  )
225  {
226  ret.push_back(
227  zoom(machine, lambda, prev_a, cur_a, initial_val,
228  initial_solution, search_dir, wolfe_c1, wolfe_c2,
229  initial_lgrad, prev_fval, prev_lgrad, cur_fval, cur_lgrad)
230  );
231  return ret;
232  }
233 
234  if (CMath::abs(cur_lgrad) <= -wolfe_c2*initial_lgrad)
235  {
236  line_search_res ls_res;
237  ls_res.a = cur_a;
238  ls_res.fval = cur_fval;
239  ls_res.reg = cur_reg;
240  ls_res.solution = x;
241  ls_res.gradient = cur_subgrad;
242  ret.push_back(ls_res);
243  return ret;
244  }
245 
246  if (cur_lgrad >= 0)
247  {
248  ret.push_back(
249  zoom(machine, lambda, cur_a, prev_a, initial_val,
250  initial_solution, search_dir, wolfe_c1, wolfe_c2,
251  initial_lgrad, cur_fval, cur_lgrad, prev_fval, prev_lgrad)
252  );
253  return ret;
254  }
255  iter++;
256  if ((CMath::abs(cur_a - amax) <= 0.01*amax) || (iter >= max_iter))
257  {
258  line_search_res ls_res;
259  ls_res.a = cur_a;
260  ls_res.fval = cur_fval;
261  ls_res.reg = cur_reg;
262  ls_res.solution = x;
263  ls_res.gradient = cur_subgrad;
264  ret.push_back(ls_res);
265  return ret;
266  }
267 
268  prev_a = cur_a;
269  prev_fval = cur_fval;
270  prev_lgrad = cur_lgrad;
271 
272  cur_a = (cur_a + amax)*0.5;
273  }
274 }
275 
276 inline void update_H(BmrmStatistics& ncbm,
277  bmrm_ll* head,
278  bmrm_ll* tail,
279  SGMatrix<float64_t>& H,
280  SGVector<float64_t>& diag_H,
281  float64_t lambda,
282  uint32_t maxCP,
283  int32_t w_dim)
284 {
285  float64_t* a_2 = get_cutting_plane(tail);
286  bmrm_ll* cp_ptr=head;
287 
288  for (uint32_t i=0; i < ncbm.nCP; ++i)
289  {
290  float64_t* a_1 = get_cutting_plane(cp_ptr);
291  cp_ptr=cp_ptr->next;
292 
293  float64_t dot_val = CMath::dot(a_2, a_1, w_dim);
294 
295  H.matrix[LIBBMRM_INDEX(ncbm.nCP, i, maxCP)]
296  = H.matrix[LIBBMRM_INDEX(i, ncbm.nCP, maxCP)]
297  = dot_val/lambda;
298  }
299 
300  /* set the diagonal element, i.e. subgrad_i*subgrad_i' */
301  float64_t dot_val = CMath::dot(a_2, a_2, w_dim);
302  H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)]=dot_val/lambda;
303 
304  diag_H[ncbm.nCP]=H[LIBBMRM_INDEX(ncbm.nCP, ncbm.nCP, maxCP)];
305 
306  ncbm.nCP++;
307 }
308 
309 
310 BmrmStatistics svm_ncbm_solver(
311  CDualLibQPBMSOSVM *machine,
312  float64_t *w,
313  float64_t TolRel,
314  float64_t TolAbs,
315  float64_t _lambda,
316  uint32_t _BufSize,
317  bool cleanICP,
318  uint32_t cleanAfter,
319  bool is_convex,
320  bool line_search,
321  bool verbose
322  )
323 {
324  BmrmStatistics ncbm;
325  libqp_state_T qp_exitflag={0, 0, 0, 0};
326 
327  CStructuredModel* model = machine->get_model();
328  int32_t w_dim = model->get_dim();
329 
330  maxCPs = _BufSize;
331  BufSize = _BufSize;
332 
333  ncbm.nCP=0;
334  ncbm.nIter=0;
335  ncbm.exitflag=0;
336 
337  /* variables for timing the algorithm*/
338  CTime ttime;
339  float64_t tstart, tstop;
340  tstart=ttime.cur_time_diff(false);
341 
342  /* matrix of subgradiends */
343  SGMatrix<float64_t> A(w_dim, maxCPs);
344 
345  /* bias vector */
346  SGVector<float64_t> bias(maxCPs);
347  bias.zero();
348 
349  /* Matrix for storing H = A*A' */
350  SGMatrix<float64_t> H(maxCPs,maxCPs);
351  HMatrix = H.matrix;
352 
353  /* diag_H */
354  SGVector<float64_t> diag_H(maxCPs);
355  diag_H.zero();
356 
357  /* x the solution vector of the dual problem:
358  * 1/lambda x*H*x' - x*bias
359  */
360  SGVector<float64_t> x(maxCPs);
361  x.zero();
362 
363  /* for sum_{i in I_k} x[i] <= b[k] for all k such that S[k] == 1 */
364  float64_t b = 1.0;
365  uint8_t S = 1;
366  SGVector<uint32_t> I(maxCPs);
367  I.set_const(1);
368 
369  /* libqp paramteres */
370  uint32_t QPSolverMaxIter = 0xFFFFFFFF;
371  float64_t QPSolverTolRel = 1E-9;
372 
373  /* variables for maintaining inactive planes */
374  SGVector<bool> map(maxCPs);
375  map.set_const(true);
376  ICP_stats icp_stats;
377  icp_stats.maxCPs = maxCPs;
378  icp_stats.ICPcounter = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
379  icp_stats.ICPs = (float64_t**) LIBBMRM_CALLOC(maxCPs, float64_t*);
380  icp_stats.ACPs = (uint32_t*) LIBBMRM_CALLOC(maxCPs, uint32_t);
381  icp_stats.H_buff = (float64_t*) LIBBMRM_CALLOC(maxCPs*maxCPs,float64_t);
382  if
383  (
384  icp_stats.ICPcounter == NULL || icp_stats.ICPs == NULL
385  || icp_stats.ACPs == NULL || icp_stats.H_buff == NULL
386  )
387  {
388  ncbm.exitflag=-2;
389  LIBBMRM_FREE(icp_stats.ICPcounter);
390  LIBBMRM_FREE(icp_stats.ICPs);
391  LIBBMRM_FREE(icp_stats.ACPs);
392  LIBBMRM_FREE(icp_stats.H_buff);
393  return ncbm;
394  }
395 
396  /* best */
397  float64_t best_Fp = CMath::INFTY;
398  float64_t best_risk = CMath::INFTY;
399  SGVector<float64_t> best_w(w_dim);
400  best_w.zero();
401  SGVector<float64_t> best_subgrad(w_dim);
402  best_subgrad.zero();
403 
404  /* initial solution */
405  SGVector<float64_t> cur_subgrad(w_dim);
406  SGVector<float64_t> cur_w(w_dim);
407  memcpy(cur_w.vector, w, sizeof(float64_t)*w_dim);
408 
409  float64_t cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
410  bias[0] = -cur_risk;
411  best_Fp = 0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen) + cur_risk;
412  best_risk = cur_risk;
413  memcpy(best_w.vector, cur_w.vector, sizeof(float64_t)*w_dim);
414  memcpy(best_subgrad.vector, cur_subgrad.vector, sizeof(float64_t)*w_dim);
415 
416  /* create a double-linked list over the A the subgrad matrix */
417  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_list=NULL;
418  cp_list = (bmrm_ll*) SG_CALLOC(bmrm_ll, 1);
419  if (cp_list==NULL)
420  {
421  ncbm.exitflag=-2;
422  return ncbm;
423  }
424  /* save the subgradient */
425  memcpy(A.matrix, cur_subgrad.vector, sizeof(float64_t)*w_dim);
426  map[0] = false;
427  cp_list->address=&A[0];
428  cp_list->idx=0;
429  cp_list->prev=NULL;
430  cp_list->next=NULL;
431  CPList_head=cp_list;
432  CPList_tail=cp_list;
433 
434  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
435  tstop=ttime.cur_time_diff(false);
436  if (verbose)
437  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
438  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, cur_risk);
439 
440  float64_t astar = 0.01;
441 
442  SG_SPRINT("clean icps: %d\n", cleanICP)
443  while (ncbm.exitflag==0)
444  {
445  tstart=ttime.cur_time_diff(false);
446  ncbm.nIter++;
447 
448  //diag_H.display_vector();
449  //bias.display_vector();
450 
451  /* solve the dual of the problem, namely:
452  *
453  */
454  qp_exitflag =
455  libqp_splx_solver(&get_col, diag_H.vector, bias.vector, &b, I.vector, &S, x.vector,
456  ncbm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
457 
458  ncbm.Fd = -qp_exitflag.QP;
459 
460  ncbm.qp_exitflag=qp_exitflag.exitflag;
461 
462  /* Update ICPcounter (add one to unused and reset used)
463  * + compute number of active CPs */
464  ncbm.nzA=0;
465 
466  for (uint32_t i=0; i < ncbm.nCP; ++i)
467  {
468  if (x[i] > epsilon)
469  {
470  /* cp was active => reset counter */
471  ++ncbm.nzA;
472  icp_stats.ICPcounter[i]=0;
473  }
474  else
475  {
476  icp_stats.ICPcounter[i]++;
477  }
478  }
479 
480  /* Inactive Cutting Planes (ICP) removal */
481  if (cleanICP)
482  {
483  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
484  H.matrix, diag_H.vector, x.vector,
485  map.vector, cleanAfter, bias.vector, I.vector);
486  }
487 
488  /* calculate the new w
489  * w[i] = -1/lambda*A[i]*x[i]
490  */
491  cur_w.zero();
492  cp_ptr=CPList_head;
493  for (uint32_t i=0; i < ncbm.nCP; ++i)
494  {
495  float64_t* A_1 = get_cutting_plane(cp_ptr);
496  cp_ptr=cp_ptr->next;
497  SGVector<float64_t>::vec1_plus_scalar_times_vec2(cur_w.vector, -x[i]/_lambda, A_1, w_dim);
498  }
499 
500  bool calc_gap = false;
501  if (calc_gap)
502  {
503  SGVector<float64_t> scores(ncbm.nCP);
504  cp_ptr=CPList_head;
505 
506  for (uint32_t i=0; i < ncbm.nCP; ++i)
507  {
508  float64_t* a_1 = get_cutting_plane(cp_ptr);
509  cp_ptr = cp_ptr->next;
510  scores[i] = CMath::dot(cur_w.vector, a_1, w_dim);
511  }
512  scores.vec1_plus_scalar_times_vec2(scores.vector, -1.0, bias.vector, scores.vlen);
513 
514  float64_t w_norm = CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
515  float64_t PO = 0.5*_lambda*w_norm + CMath::max(scores.vector, scores.vlen);
516  float64_t QP_gap = PO - ncbm.Fd;
517 
518  SG_SPRINT("%4d: primal:%f dual:%f QP_gap:%f\n", ncbm.nIter, PO, ncbm.Fd, QP_gap)
519  }
520 
521  /* Stopping conditions */
522  if ((best_Fp - ncbm.Fd) <= TolRel*LIBBMRM_ABS(best_Fp))
523  ncbm.exitflag = 1;
524 
525  if ((best_Fp - ncbm.Fd) <= TolAbs)
526  ncbm.exitflag = 2;
527 
528  if (ncbm.nCP >= maxCPs)
529  ncbm.exitflag = -1;
530 
531  // next CP would exceed BufSize/maxCPs
532  if (ncbm.nCP+3 >= maxCPs)
533  ncbm.exitflag=-1;
534 
535  tstop=ttime.cur_time_diff(false);
536 
537  /* Verbose output */
538  if (verbose)
539  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d, best_fp=%f, gap=%f\n",
540  ncbm.nIter, tstop-tstart, ncbm.Fp, ncbm.Fd, ncbm.Fp-ncbm.Fd,
541  (ncbm.Fp-ncbm.Fd)/ncbm.Fp, cur_risk, ncbm.nCP, ncbm.nzA, qp_exitflag.exitflag, best_Fp, (best_Fp-ncbm.Fd)/best_Fp);
542 
543  if (ncbm.exitflag!=0)
544  break;
545 
546  std::vector<line_search_res> wbest_candidates;
547  if (!line_search)
548  {
549  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
550 
551  add_cutting_plane(&CPList_tail, map, A.matrix,
552  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
553 
554  bias[ncbm.nCP] = CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
555 
556  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
557 
558  // add as a new wbest candidate
559  line_search_res ls;
560  ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
561  ls.solution = cur_w;
562  ls.gradient = cur_subgrad;
563 
564  wbest_candidates.push_back(ls);
565  }
566  else
567  {
568  tstart=ttime.cur_time_diff(false);
569  /* do line searching */
570  SGVector<float64_t> search_dir(w_dim);
571  search_dir.add(search_dir.vector, 1.0, cur_w.vector, -1.0, best_w.vector, w_dim);
572 
573  float64_t norm_dir = search_dir.twonorm(search_dir.vector, search_dir.vlen);
574  float64_t astart;
575  uint32_t cp_min_approx = 0;
576  if (cp_min_approx || (ncbm.nIter == 1))
577  {
578  astart = 1.0;
579  }
580  else
581  {
582  astart = CMath::min(astar/norm_dir,1.0);
583  if (astart == 0)
584  astart = 1.0;
585  }
586 
587  /* line search */
588  std::vector<line_search_res> ls_res
589  = line_search_with_strong_wolfe(machine, _lambda, best_Fp, best_w, best_subgrad, search_dir, astart);
590 
591  if (ls_res[0].fval != ls_res[1].fval)
592  {
593  ls_res[0].gradient.vec1_plus_scalar_times_vec2(ls_res[0].gradient.vector, -_lambda, ls_res[0].solution.vector, w_dim);
594 
595  add_cutting_plane(&CPList_tail, map, A.matrix,
596  find_free_idx(map, maxCPs), ls_res[0].gradient, w_dim);
597 
598  bias[ncbm.nCP]
599  = CMath::dot(ls_res[0].solution.vector, ls_res[0].gradient, w_dim)
600  - (ls_res[0].fval - ls_res[0].reg);
601 
602  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
603 
604  wbest_candidates.push_back(ls_res[0]);
605  }
606 
607  ls_res[1].gradient.vec1_plus_scalar_times_vec2(ls_res[1].gradient.vector, -_lambda, ls_res[1].solution.vector, w_dim);
608 
609  add_cutting_plane(&CPList_tail, map, A.matrix,
610  find_free_idx(map, maxCPs), ls_res[1].gradient.vector, w_dim);
611 
612  bias[ncbm.nCP]
613  = CMath::dot(ls_res[1].solution.vector, ls_res[1].gradient.vector, w_dim)
614  - (ls_res[1].fval - ls_res[1].reg);
615 
616  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
617 
618  wbest_candidates.push_back(ls_res[1]);
619 
620  if ((best_Fp <= ls_res[1].fval) && (astart != 1))
621  {
622  cur_risk = machine->risk(cur_subgrad.vector, cur_w.vector);
623 
624  add_cutting_plane(&CPList_tail, map, A.matrix,
625  find_free_idx(map, maxCPs), cur_subgrad.vector, w_dim);
626 
627  bias[ncbm.nCP]
628  = CMath::dot(cur_w.vector, cur_subgrad.vector, cur_w.vlen) - cur_risk;
629 
630  update_H(ncbm, CPList_head, CPList_tail, H, diag_H, _lambda, maxCPs, w_dim);
631 
632  /* add as a new wbest candidate */
633  line_search_res ls;
634  ls.fval = cur_risk+0.5*_lambda*CMath::dot(cur_w.vector, cur_w.vector, cur_w.vlen);
635  ls.solution = cur_w;
636  ls.gradient = cur_subgrad;
637  SG_SPRINT("%lf\n", ls.fval)
638 
639  wbest_candidates.push_back(ls);
640  }
641 
642  astar = ls_res[1].a * norm_dir;
643 
644  tstop=ttime.cur_time_diff(false);
645  SG_SPRINT("\t\tline search time: %.5lf\n", tstop-tstart)
646  }
647 
648  /* search for the best w among the new candidates */
649  if (verbose)
650  SG_SPRINT("\t searching for the best Fp:\n")
651  for (size_t i = 0; i < wbest_candidates.size(); i++)
652  {
653  if (verbose)
654  SG_SPRINT("\t\t %d fcurrent: %.16lf\n", i, wbest_candidates[i].fval)
655 
656  if (wbest_candidates[i].fval < best_Fp)
657  {
658  best_Fp = wbest_candidates[i].fval;
659  best_risk = wbest_candidates[i].fval - wbest_candidates[i].reg;
660  memcpy(best_w, wbest_candidates[i].solution.vector, sizeof(float64_t)*w_dim);
661  memcpy(best_subgrad.vector, wbest_candidates[i].gradient.vector, sizeof(float64_t)*w_dim);
662 
663  ncbm.Fp = best_Fp;
664 
665  if (verbose)
666  SG_SPRINT("\t\t new best norm: %f\n",
667  best_w.twonorm(best_w.vector, w_dim));
668  }
669 
670  if (!is_convex)
671  {
672  index_t cp_idx = ncbm.nCP-(wbest_candidates.size()-i);
673 
674  /* conflict */
675  float64_t score
676  = CMath::dot(best_w.vector,
677  wbest_candidates[i].gradient.vector, w_dim)
678  + (-1.0*bias[cp_idx]);
679  if (score > best_risk)
680  {
681  float64_t U
682  = best_risk
683  - CMath::dot(best_w.vector,
684  wbest_candidates[i].gradient.vector, w_dim);
685 
686  float64_t L
687  = best_Fp - wbest_candidates[i].reg
688  - CMath::dot(wbest_candidates[i].solution.vector,
689  wbest_candidates[i].gradient.vector, w_dim);
690 
691  if (verbose)
692  SG_SPRINT("CONFLICT Rbest=%.6lg score=%g L=%.6lg U=%.6lg\n", best_risk, score, L, U)
693  if (L <= U)
694  {
695  if (verbose)
696  SG_SPRINT("%.6lf < %.6lf => changing bias[%d]=%g\n", L, U, cp_idx, L)
697  bias[cp_idx]= -L;
698  }
699  else
700  {
701  wbest_candidates[i].gradient.zero();
702  SGVector<float64_t>::vec1_plus_scalar_times_vec2(wbest_candidates[i].gradient.vector, -_lambda, best_w.vector, w_dim);
703 
704  cp_ptr = CPList_tail;
705  for (size_t j = wbest_candidates.size()-1; i < j; --j)
706  {
707  cp_ptr = cp_ptr->prev;
708  SG_SPRINT("tail - %d\n (%d)", j, i)
709  }
710 
711  float64_t* cp = get_cutting_plane(cp_ptr);
712  LIBBMRM_MEMCPY(cp, wbest_candidates[i].gradient.vector, w_dim*sizeof(float64_t));
713 
714  /* update the corresponding column and row in H */
715  cp_ptr = CPList_head;
716  for (uint32_t j = 0; j < ncbm.nCP-1; ++j)
717  {
718  float64_t* a = get_cutting_plane(cp_ptr);
719  cp_ptr = cp_ptr->next;
720  float64_t dot_val
721  = CMath::dot(a, wbest_candidates[i].gradient.vector, w_dim);
722 
723  H.matrix[LIBBMRM_INDEX(cp_idx, j, maxCPs)]
724  = H.matrix[LIBBMRM_INDEX(j, cp_idx, maxCPs)]
725  = dot_val/_lambda;
726  }
727 
728  diag_H[LIBBMRM_INDEX(cp_idx, cp_idx, maxCPs)]
729  = CMath::dot(wbest_candidates[i].gradient.vector,
730  wbest_candidates[i].gradient.vector, w_dim);
731 
732 
733  bias[cp_idx]
734  = best_Fp - wbest_candidates[i].reg
735  - CMath::dot(wbest_candidates[i].solution.vector,
736  wbest_candidates[i].gradient.vector, w_dim);
737 
738  if (verbose)
739  SG_SPRINT("solved by changing nCP=%d bias:%g (%g)\n", cp_idx, bias[cp_idx], L)
740  }
741  }
742  }
743  }
744 
745  /* Inactive Cutting Planes (ICP) removal
746  if (cleanICP)
747  {
748  clean_icp(&icp_stats, ncbm, &CPList_head, &CPList_tail,
749  H.matrix, diag_H.vector, x.vector,
750  map.vector, cleanAfter, bias.vector, I.vector);
751  }
752  */
753  }
754 
755  memcpy(w, best_w.vector, sizeof(float64_t)*w_dim);
756 
757  /* free ICP_stats variables */
758  LIBBMRM_FREE(icp_stats.ICPcounter);
759  LIBBMRM_FREE(icp_stats.ICPs);
760  LIBBMRM_FREE(icp_stats.ACPs);
761  LIBBMRM_FREE(icp_stats.H_buff);
762 
763  cp_ptr=CPList_head;
764  while(cp_ptr!=NULL)
765  {
766  bmrm_ll * cp_ptr_this=cp_ptr;
767  cp_ptr=cp_ptr->next;
768  LIBBMRM_FREE(cp_ptr_this);
769  cp_ptr_this=NULL;
770  }
771 
772  SG_UNREF(model);
773 
774  return ncbm;
775 }
776 
777 } /* namespace shogun */
778 #endif //USE_GPL_SHOGUN
int32_t index_t
Definition: common.h:62
static const float64_t INFTY
infinity
Definition: Math.h:2048
#define SG_SPRINT(...)
Definition: SGIO.h:180
double float64_t
Definition: common.h:50
static T max(T a, T b)
Definition: Math.h:168
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Definition: Math.h:627
static void vec1_plus_scalar_times_vec2(T *vec1, const T scalar, const T *vec2, int32_t n)
x=x+alpha*y
Definition: SGVector.cpp:529
#define SG_UNREF(x)
Definition: SGObject.h:55
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
#define IGNORE_IN_CLASSLIST
Definition: CPLEXSVM.h:24
static T min(T a, T b)
Definition: Math.h:157
static float32_t sqrt(float32_t x)
Definition: Math.h:459
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation