SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
libbmrm.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * libbmrm.h: Implementation of the BMRM solver for SO training
8  *
9  * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz
10  *
11  * Implementation of the BMRM solver
12  *--------------------------------------------------------------------- */
13 
15 #include <shogun/lib/external/libqp.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/io/SGIO.h>
18 
19 #include <climits>
20 #include <limits>
21 
22 namespace shogun
23 {
24 static const uint32_t QPSolverMaxIter=0xFFFFFFFF;
25 static const float64_t epsilon=0.0;
26 
27 static float64_t *H;
28 uint32_t BufSize;
29 
31  bmrm_ll** tail,
32  bool* map,
33  float64_t* A,
34  uint32_t free_idx,
35  float64_t* cp_data,
36  uint32_t dim)
37 {
38  REQUIRE(map[free_idx],
39  "add_cutting_plane: CP index %u is not free\n", free_idx)
40 
41  LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t));
42  map[free_idx]=false;
43 
45 
46  if (cp==NULL)
47  {
48  SG_SERROR("Out of memory.\n")
49  return;
50  }
51 
52  cp->address=A+(free_idx*dim);
53  cp->prev=*tail;
54  cp->next=NULL;
55  cp->idx=free_idx;
56  (*tail)->next=cp;
57  *tail=cp;
58 }
59 
61  bmrm_ll** head,
62  bmrm_ll** tail,
63  bool* map,
64  float64_t* icp)
65 {
66  bmrm_ll *cp_list_ptr=*head;
67 
68  while(cp_list_ptr->address != icp)
69  {
70  cp_list_ptr=cp_list_ptr->next;
71  }
72 
73  if (cp_list_ptr==*head)
74  {
75  *head=(*head)->next;
76  cp_list_ptr->next->prev=NULL;
77  }
78  else if (cp_list_ptr==*tail)
79  {
80  *tail=(*tail)->prev;
81  cp_list_ptr->prev->next=NULL;
82  }
83  else
84  {
85  cp_list_ptr->prev->next=cp_list_ptr->next;
86  cp_list_ptr->next->prev=cp_list_ptr->prev;
87  }
88 
89  map[cp_list_ptr->idx]=true;
90  LIBBMRM_FREE(cp_list_ptr);
91 }
92 
93 void clean_icp(ICP_stats* icp_stats,
94  BmrmStatistics& bmrm,
95  bmrm_ll** head,
96  bmrm_ll** tail,
97  float64_t*& Hmat,
98  float64_t*& diag_H,
99  float64_t*& beta,
100  bool*& map,
101  uint32_t cleanAfter,
102  float64_t*& b,
103  uint32_t*& I,
104  uint32_t cp_models
105  )
106 {
107  /* find ICP */
108  uint32_t cntICP=0;
109  uint32_t cntACP=0;
110  bmrm_ll* cp_ptr=*head;
111  uint32_t tmp_idx=0;
112 
113  while (cp_ptr != *tail)
114  {
115  if (icp_stats->ICPcounter[tmp_idx++]>=cleanAfter)
116  {
117  icp_stats->ICPs[cntICP++]=cp_ptr->address;
118  }
119  else
120  {
121  icp_stats->ACPs[cntACP++]=tmp_idx-1;
122  }
123 
124  cp_ptr=cp_ptr->next;
125  }
126 
127  /* do ICP removal */
128  if (cntICP > 0)
129  {
130  uint32_t nCP_new=bmrm.nCP-cntICP;
131 
132  for (uint32_t i=0; i<cntICP; ++i)
133  {
134  tmp_idx=0;
135  cp_ptr=*head;
136 
137  while(cp_ptr->address != icp_stats->ICPs[i])
138  {
139  cp_ptr=cp_ptr->next;
140  tmp_idx++;
141  }
142 
143  remove_cutting_plane(head, tail, map, icp_stats->ICPs[i]);
144 
145  LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1,
146  (bmrm.nCP+cp_models-tmp_idx)*sizeof(float64_t));
147  LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1,
148  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
149  LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1,
150  (bmrm.nCP-tmp_idx)*sizeof(float64_t));
151  LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1,
152  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
153  LIBBMRM_MEMMOVE(icp_stats->ICPcounter+tmp_idx, icp_stats->ICPcounter+tmp_idx+1,
154  (bmrm.nCP-tmp_idx)*sizeof(uint32_t));
155  }
156 
157  /* H */
158  for (uint32_t i=0; i < nCP_new; ++i)
159  {
160  for (uint32_t j=0; j < nCP_new; ++j)
161  {
162  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
163  Hmat[LIBBMRM_INDEX(icp_stats->ACPs[i], icp_stats->ACPs[j], icp_stats->maxCPs)];
164  }
165  }
166 
167  for (uint32_t i=0; i<nCP_new; ++i)
168  for (uint32_t j=0; j<nCP_new; ++j)
169  Hmat[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)]=
170  icp_stats->H_buff[LIBBMRM_INDEX(i, j, icp_stats->maxCPs)];
171 
172  bmrm.nCP=nCP_new;
173  ASSERT(bmrm.nCP<BufSize);
174  }
175 }
176 
177 /*----------------------------------------------------------------------
178  Returns pointer at i-th column of Hessian matrix.
179  ----------------------------------------------------------------------*/
180 static const float64_t *get_col( uint32_t i)
181 {
182  return( &H[ BufSize*i ] );
183 }
184 
186  CDualLibQPBMSOSVM *machine,
187  float64_t* W,
188  float64_t TolRel,
189  float64_t TolAbs,
190  float64_t _lambda,
191  uint32_t _BufSize,
192  bool cleanICP,
193  uint32_t cleanAfter,
194  float64_t K,
195  uint32_t Tmax,
196  bool verbose)
197 {
198  BmrmStatistics bmrm;
199  libqp_state_T qp_exitflag={0, 0, 0, 0};
200  float64_t *b, *beta, *diag_H, *prevW;
201  float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0;
202  floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0;
203  uint32_t *I;
204  uint8_t S=1;
205  CStructuredModel* model=machine->get_model();
206  uint32_t nDim=model->get_dim();
207  CSOSVMHelper* helper = NULL;
208 
209  CTime ttime;
210  float64_t tstart, tstop;
211 
212  bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL;
213  float64_t *A_1=NULL, *A_2=NULL;
214  bool *map=NULL;
215 
216 
217  tstart=ttime.cur_time_diff(false);
218 
219  BufSize=_BufSize;
220  QPSolverTolRel=1e-9;
221 
222  uint32_t histSize = BufSize;
223  H=NULL;
224  b=NULL;
225  beta=NULL;
226  A=NULL;
227  subgrad=NULL;
228  diag_H=NULL;
229  I=NULL;
230  prevW=NULL;
231 
232 
234 
235  if (H==NULL)
236  {
237  bmrm.exitflag=-2;
238  goto cleanup;
239  }
240 
241  ASSERT(nDim > 0);
242  ASSERT(BufSize > 0);
243  REQUIRE(BufSize < (std::numeric_limits<size_t>::max() / nDim),
244  "overflow: %u * %u > %u -- biggest possible BufSize=%u or nDim=%u\n",
245  BufSize, nDim, std::numeric_limits<size_t>::max(),
246  (std::numeric_limits<size_t>::max() / nDim),
247  (std::numeric_limits<size_t>::max() / BufSize));
248 
249  A= (float64_t*) LIBBMRM_CALLOC(size_t(nDim)*size_t(BufSize), float64_t);
250 
251  if (A==NULL)
252  {
253  bmrm.exitflag=-2;
254  goto cleanup;
255  }
256 
257  b= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
258 
259  if (b==NULL)
260  {
261  bmrm.exitflag=-2;
262  goto cleanup;
263  }
264 
265  beta= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
266 
267  if (beta==NULL)
268  {
269  bmrm.exitflag=-2;
270  goto cleanup;
271  }
272 
273  subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
274 
275  if (subgrad==NULL)
276  {
277  bmrm.exitflag=-2;
278  goto cleanup;
279  }
280 
281  diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, float64_t);
282 
283  if (diag_H==NULL)
284  {
285  bmrm.exitflag=-2;
286  goto cleanup;
287  }
288 
289  I= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
290 
291  if (I==NULL)
292  {
293  bmrm.exitflag=-2;
294  goto cleanup;
295  }
296 
297  ICP_stats icp_stats;
298  icp_stats.maxCPs = BufSize;
299 
300  icp_stats.ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
301  if (icp_stats.ICPcounter==NULL)
302  {
303  bmrm.exitflag=-2;
304  goto cleanup;
305  }
306 
307  icp_stats.ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, float64_t*);
308  if (icp_stats.ICPs==NULL)
309  {
310  bmrm.exitflag=-2;
311  goto cleanup;
312  }
313 
314  icp_stats.ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, uint32_t);
315  if (icp_stats.ACPs==NULL)
316  {
317  bmrm.exitflag=-2;
318  goto cleanup;
319  }
320 
321  /* Temporary buffers for ICP removal */
322  icp_stats.H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, float64_t);
323  if (icp_stats.H_buff==NULL)
324  {
325  bmrm.exitflag=-2;
326  goto cleanup;
327  }
328 
329  map= (bool*) LIBBMRM_CALLOC(BufSize, bool);
330 
331  if (map==NULL)
332  {
333  bmrm.exitflag=-2;
334  goto cleanup;
335  }
336 
337  memset( (bool*) map, true, BufSize);
338 
339  cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, bmrm_ll);
340 
341  if (cp_list==NULL)
342  {
343  bmrm.exitflag=-2;
344  goto cleanup;
345  }
346 
347  prevW= (float64_t*) LIBBMRM_CALLOC(nDim, float64_t);
348 
349  if (prevW==NULL)
350  {
351  bmrm.exitflag=-2;
352  goto cleanup;
353  }
354 
355  bmrm.hist_Fp = SGVector< float64_t >(histSize);
356  bmrm.hist_Fd = SGVector< float64_t >(histSize);
357  bmrm.hist_wdist = SGVector< float64_t >(histSize);
358 
359  /* Iinitial solution */
360  R=machine->risk(subgrad, W);
361 
362  bmrm.nCP=0;
363  bmrm.nIter=0;
364  bmrm.exitflag=0;
365 
366  b[0]=-R;
367 
368  /* Cutting plane auxiliary double linked list */
369 
370  LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t));
371  map[0]=false;
372  cp_list->address=&A[0];
373  cp_list->idx=0;
374  cp_list->prev=NULL;
375  cp_list->next=NULL;
376  CPList_head=cp_list;
377  CPList_tail=cp_list;
378 
379  /* Compute initial value of Fp, Fd, assuming that W is zero vector */
380 
381  sq_norm_W=0;
382  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
383  bmrm.Fd=-LIBBMRM_PLUS_INF;
384 
385  tstop=ttime.cur_time_diff(false);
386 
387  /* Verbose output */
388 
389  if (verbose)
390  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n",
391  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R);
392 
393  /* store Fp, Fd and wdist history */
394  bmrm.hist_Fp[0]=bmrm.Fp;
395  bmrm.hist_Fd[0]=bmrm.Fd;
396  bmrm.hist_wdist[0]=0.0;
397 
398  if (verbose)
399  helper = machine->get_helper();
400 
401  /* main loop */
402  ASSERT(bmrm.nCP<BufSize);
403  while (bmrm.exitflag==0)
404  {
405  tstart=ttime.cur_time_diff(false);
406  bmrm.nIter++;
407 
408  /* Update H */
409 
410  if (bmrm.nCP>0)
411  {
412  A_2=get_cutting_plane(CPList_tail);
413  cp_ptr=CPList_head;
414 
415  for (uint32_t i=0; i<bmrm.nCP; ++i)
416  {
417  A_1=get_cutting_plane(cp_ptr);
418  cp_ptr=cp_ptr->next;
419  rsum= SGVector<float64_t>::dot(A_1, A_2, nDim);
420 
421  H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]
422  = H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]
423  = rsum/_lambda;
424  }
425  }
426 
427  A_2=get_cutting_plane(CPList_tail);
428  rsum = SGVector<float64_t>::dot(A_2, A_2, nDim);
429 
430  H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda;
431 
432  diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)];
433  I[bmrm.nCP]=1;
434 
435  beta[bmrm.nCP]=0.0; // [beta; 0]
436  bmrm.nCP++;
437  ASSERT(bmrm.nCP<BufSize);
438 
439 #if 0
440  /* TODO: scaling...*/
441  float64_t scale = SGVector<float64_t>::max(diag_H, BufSize)/(1000.0*_lambda);
442  SGVector<float64_t> sb(bmrm.nCP);
443  sb.zero();
444  sb.vec1_plus_scalar_times_vec2(sb.vector, 1/scale, b, bmrm.nCP);
445 
446  SGVector<float64_t> sh(bmrm.nCP);
447  sh.zero();
448  sb.vec1_plus_scalar_times_vec2(sh.vector, 1/scale, diag_H, bmrm.nCP);
449 
450  qp_exitflag =
451  libqp_splx_solver(&get_col, sh.vector, sb.vector, &C, I, &S, beta,
452  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
453 #else
454  /* call QP solver */
455  qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta,
456  bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0);
457 #endif
458 
459  bmrm.qp_exitflag=qp_exitflag.exitflag;
460 
461  /* Update ICPcounter (add one to unused and reset used)
462  * + compute number of active CPs */
463  bmrm.nzA=0;
464 
465  for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa)
466  {
467  if (beta[aaa]>epsilon)
468  {
469  ++bmrm.nzA;
470  icp_stats.ICPcounter[aaa]=0;
471  }
472  else
473  {
474  icp_stats.ICPcounter[aaa]+=1;
475  }
476  }
477 
478  /* W update */
479  memset(W, 0, sizeof(float64_t)*nDim);
480  cp_ptr=CPList_head;
481  for (uint32_t j=0; j<bmrm.nCP; ++j)
482  {
483  A_1=get_cutting_plane(cp_ptr);
484  cp_ptr=cp_ptr->next;
485  SGVector<float64_t>::vec1_plus_scalar_times_vec2(W, -beta[j]/_lambda, A_1, nDim);
486  }
487 
488  /* risk and subgradient computation */
489  R = machine->risk(subgrad, W);
490  add_cutting_plane(&CPList_tail, map, A,
491  find_free_idx(map, BufSize), subgrad, nDim);
492 
493  sq_norm_W=SGVector<float64_t>::dot(W, W, nDim);
494  b[bmrm.nCP]=SGVector<float64_t>::dot(subgrad, W, nDim) - R;
495 
496  sq_norm_Wdiff=0.0;
497  for (uint32_t j=0; j<nDim; ++j)
498  {
499  sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]);
500  }
501 
502  bmrm.Fp=R+0.5*_lambda*sq_norm_W;
503  bmrm.Fd=-qp_exitflag.QP;
504  wdist=CMath::sqrt(sq_norm_Wdiff);
505 
506  /* Stopping conditions */
507  if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp))
508  bmrm.exitflag=1;
509 
510  if (bmrm.Fp - bmrm.Fd <= TolAbs)
511  bmrm.exitflag=2;
512 
513  tstop=ttime.cur_time_diff(false);
514 
515  /* Verbose output */
516  if (verbose)
517  SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n",
518  bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd,
519  (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag);
520 
521  // iteration exceeds histSize
522  if (bmrm.nIter >= histSize)
523  {
524  histSize += BufSize;
525  bmrm.hist_Fp.resize_vector(histSize);
526  bmrm.hist_Fd.resize_vector(histSize);
527  bmrm.hist_wdist.resize_vector(histSize);
528  }
529 
530  /* Keep Fp, Fd and w_dist history */
531  ASSERT(bmrm.nIter < histSize);
532  bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp;
533  bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd;
534  bmrm.hist_wdist[bmrm.nIter]=wdist;
535 
536  /* keep W (for wdist history track) */
537  LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t));
538 
539  /* Inactive Cutting Planes (ICP) removal */
540  if (cleanICP)
541  {
542  clean_icp(&icp_stats, bmrm, &CPList_head, &CPList_tail, H, diag_H, beta, map, cleanAfter, b, I);
543  ASSERT(bmrm.nCP<BufSize);
544  }
545 
546  // next CP would exceed BufSize
547  if (bmrm.nCP+1 >= BufSize)
548  bmrm.exitflag=-1;
549 
550  /* Debug: compute objective and training error */
551  if (verbose && SG_UNLIKELY(sg_io->loglevel_above(MSG_DEBUG)))
552  {
553  float64_t debug_tstart=ttime.cur_time_diff(false);
554 
555  SGVector<float64_t> w_debug(W, nDim, false);
556  float64_t primal = CSOSVMHelper::primal_objective(w_debug, model, _lambda);
557  float64_t train_error = CSOSVMHelper::average_loss(w_debug, model);
558  helper->add_debug_info(primal, bmrm.nIter, train_error);
559 
560  float64_t debug_tstop=ttime.cur_time_diff(false);
561  SG_SPRINT("%4d: add_debug_info: tim=%.3lf, primal=%.3lf, train_error=%lf\n",
562  bmrm.nIter, debug_tstop-debug_tstart, primal, train_error);
563  }
564 
565  } /* end of main loop */
566 
567  if (verbose)
568  {
569  helper->terminate();
570  SG_UNREF(helper);
571  }
572 
573  ASSERT(bmrm.nIter+1 <= histSize);
574  bmrm.hist_Fp.resize_vector(bmrm.nIter+1);
575  bmrm.hist_Fd.resize_vector(bmrm.nIter+1);
576  bmrm.hist_wdist.resize_vector(bmrm.nIter+1);
577 
578  cp_ptr=CPList_head;
579 
580  while(cp_ptr!=NULL)
581  {
582  cp_ptr2=cp_ptr;
583  cp_ptr=cp_ptr->next;
584  LIBBMRM_FREE(cp_ptr2);
585  cp_ptr2=NULL;
586  }
587 
588  cp_list=NULL;
589 
590 cleanup:
591 
592  LIBBMRM_FREE(H);
593  LIBBMRM_FREE(b);
594  LIBBMRM_FREE(beta);
595  LIBBMRM_FREE(A);
596  LIBBMRM_FREE(subgrad);
597  LIBBMRM_FREE(diag_H);
598  LIBBMRM_FREE(I);
599  LIBBMRM_FREE(icp_stats.ICPcounter);
600  LIBBMRM_FREE(icp_stats.ICPs);
601  LIBBMRM_FREE(icp_stats.ACPs);
602  LIBBMRM_FREE(icp_stats.H_buff);
603  LIBBMRM_FREE(map);
604  LIBBMRM_FREE(prevW);
605 
606  if (cp_list)
607  LIBBMRM_FREE(cp_list);
608 
609  SG_UNREF(model);
610 
611  return(bmrm);
612 }
613 }

SHOGUN Machine Learning Toolbox - Documentation